iPXE
Data Structures | Defines | Functions | Variables
ipoib.c File Reference

IP over Infiniband. More...

#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <byteswap.h>
#include <errno.h>
#include <ipxe/errortab.h>
#include <ipxe/malloc.h>
#include <ipxe/if_arp.h>
#include <ipxe/arp.h>
#include <ipxe/if_ether.h>
#include <ipxe/ethernet.h>
#include <ipxe/ip.h>
#include <ipxe/iobuf.h>
#include <ipxe/netdevice.h>
#include <ipxe/infiniband.h>
#include <ipxe/ib_pathrec.h>
#include <ipxe/ib_mcast.h>
#include <ipxe/retry.h>
#include <ipxe/ipoib.h>

Go to the source code of this file.

Data Structures

struct  ipoib_broadcast
 An IPoIB broadcast address. More...
struct  ipoib_device
 An IPoIB device. More...
struct  ipoib_peer
 An IPoIB REMAC cache entry. More...

Defines

#define ENXIO_ARP_REPLY   __einfo_error ( EINFO_ENXIO_ARP_REPLY )
#define EINFO_ENXIO_ARP_REPLY
#define ENXIO_NON_IPV4   __einfo_error ( EINFO_ENXIO_NON_IPV4 )
#define EINFO_ENXIO_NON_IPV4
#define ENXIO_ARP_SENT   __einfo_error ( EINFO_ENXIO_ARP_SENT )
#define EINFO_ENXIO_ARP_SENT
#define IPOIB_NUM_SEND_WQES   8
 Number of IPoIB send work queue entries.
#define IPOIB_NUM_RECV_WQES   4
 Number of IPoIB receive work queue entries.
#define IPOIB_NUM_CQES   16
 Number of IPoIB completion entries.
#define EINPROGRESS_JOINING   __einfo_error ( EINFO_EINPROGRESS_JOINING )
 Link status for "broadcast join in progress".
#define EINFO_EINPROGRESS_JOINING

Functions

 FILE_LICENCE (GPL2_OR_LATER_OR_UBDL)
static LIST_HEAD (ipoib_devices)
 List of all IPoIB devices.
static struct ipoib_macipoib_find_remac (struct ipoib_device *ipoib, const struct ipoib_remac *remac)
 Find IPoIB MAC from REMAC.
static int ipoib_map_remac (struct ipoib_device *ipoib, const struct ipoib_remac *remac, const struct ipoib_mac *mac)
 Add IPoIB MAC to REMAC cache.
static void ipoib_flush_remac (struct ipoib_device *ipoib)
 Flush REMAC cache.
static unsigned int ipoib_discard_remac (void)
 Discard some entries from the REMAC cache.
struct cache_discarder
ipoib_discarder 
__cache_discarder (CACHE_EXPENSIVE)
 IPoIB cache discarder.
static void ipoib_init_addr (const void *hw_addr, void *ll_addr)
 Initialise IPoIB link-layer address.
struct net_devicealloc_ipoibdev (size_t priv_size)
 Allocate IPoIB device.
static int ipoib_translate_tx_arp (struct net_device *netdev, struct io_buffer *iobuf)
 Translate transmitted ARP packet.
static int ipoib_translate_tx (struct net_device *netdev, struct io_buffer *iobuf, uint16_t net_proto)
 Translate transmitted packet.
static int ipoib_translate_rx_arp (struct net_device *netdev, struct io_buffer *iobuf, struct ipoib_remac *remac)
 Translate received ARP packet.
static int ipoib_translate_rx (struct net_device *netdev, struct io_buffer *iobuf, struct ipoib_remac *remac, uint16_t net_proto)
 Translate received packet.
static int ipoib_transmit (struct net_device *netdev, struct io_buffer *iobuf)
 Transmit packet via IPoIB network device.
static void ipoib_complete_send (struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct io_buffer *iobuf, int rc)
 Handle IPoIB send completion.
static void ipoib_complete_recv (struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct ib_address_vector *dest, struct ib_address_vector *source, struct io_buffer *iobuf, int rc)
 Handle IPoIB receive completion.
static struct io_bufferipoib_alloc_iob (size_t len)
 Allocate IPoIB receive I/O buffer.
static void ipoib_poll (struct net_device *netdev)
 Poll IPoIB network device.
void ipoib_join_complete (struct ib_mc_membership *membership, int rc)
 Handle IPv4 broadcast multicast group join completion.
static int ipoib_join_broadcast_group (struct ipoib_device *ipoib)
 Join IPv4 broadcast multicast group.
static void ipoib_leave_broadcast_group (struct ipoib_device *ipoib)
 Leave IPv4 broadcast multicast group.
static void ipoib_link_state_changed (struct ipoib_device *ipoib)
 Handle link status change.
static int ipoib_open (struct net_device *netdev)
 Open IPoIB network device.
static void ipoib_close (struct net_device *netdev)
 Close IPoIB network device.
static int ipoib_probe (struct ib_device *ibdev)
 Probe IPoIB device.
static void ipoib_notify (struct ib_device *ibdev)
 Handle device or link status change.
static void ipoib_remove (struct ib_device *ibdev)
 Remove IPoIB device.
struct net_deviceipoib_netdev (struct ib_device *ibdev)
 Find IPoIB network device.

Variables

struct ipoib_broadcast __attribute__
static struct ipoib_mac ipoib_broadcast
 Broadcast IPoIB address.
struct errortab ipoib_errors[] __errortab
 Human-readable message for the link status.
static struct net_device_operations ipoib_operations
 IPoIB network device operations.
struct ll_protocol ipoib_protocol __ll_protocol
 IPoIB protocol.
static struct
ib_completion_queue_operations 
ipoib_cq_op
 IPoIB completion operations.
static struct
ib_queue_pair_operations 
ipoib_qp_op
 IPoIB queue pair operations.
struct ib_driver ipoib_driver __ib_driver
 IPoIB driver.

Detailed Description

IP over Infiniband.

Definition in file ipoib.c.


Define Documentation

Definition at line 54 of file ipoib.c.

Referenced by ipoib_translate_tx_arp().

Value:
__einfo_uniqify ( EINFO_ENXIO, 0x01,                            \
                          "Missing REMAC for ARP reply target address" )

Definition at line 55 of file ipoib.c.

Definition at line 58 of file ipoib.c.

Referenced by ipoib_transmit().

Value:
__einfo_uniqify ( EINFO_ENXIO, 0x02,                            \
                          "Missing REMAC for non-IPv4 packet" )

Definition at line 59 of file ipoib.c.

Definition at line 62 of file ipoib.c.

Referenced by ipoib_transmit().

Value:
__einfo_uniqify ( EINFO_ENXIO, 0x03,                            \
                          "Missing REMAC for IPv4 packet (ARP sent)" )

Definition at line 63 of file ipoib.c.

#define IPOIB_NUM_SEND_WQES   8

Number of IPoIB send work queue entries.

Definition at line 68 of file ipoib.c.

Referenced by ipoib_open().

#define IPOIB_NUM_RECV_WQES   4

Number of IPoIB receive work queue entries.

Definition at line 71 of file ipoib.c.

Referenced by ipoib_open().

#define IPOIB_NUM_CQES   16

Number of IPoIB completion entries.

Definition at line 74 of file ipoib.c.

Referenced by ipoib_open().

Link status for "broadcast join in progress".

Definition at line 114 of file ipoib.c.

Referenced by ipoib_link_state_changed().

Value:
__einfo_uniqify \
        ( EINFO_EINPROGRESS, 0x01, "Joining" )

Definition at line 115 of file ipoib.c.


Function Documentation

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL  )
static LIST_HEAD ( ipoib_devices  ) [static]

List of all IPoIB devices.

static struct ipoib_mac* ipoib_find_remac ( struct ipoib_device ipoib,
const struct ipoib_remac remac 
) [static, read]

Find IPoIB MAC from REMAC.

Parameters:
ipoibIPoIB device
remacRemote Ethernet MAC
Return values:
macIPoIB MAC (or NULL if not found)

Definition at line 152 of file ipoib.c.

References ipoib_device::broadcast, DBGC, eth_ntoa(), is_multicast_ether_addr(), ipoib_peer::list, list_add, list_del, list_for_each_entry, ipoib_broadcast::mac, ipoib_peer::mac, memcmp(), NULL, ipoib_device::peers, and ipoib_peer::remac.

Referenced by ipoib_translate_tx_arp(), and ipoib_transmit().

                                                                               {
        struct ipoib_peer *peer;

        /* Check for broadcast or multicast REMAC.  We transmit
         * multicasts as broadcasts for simplicity.
         */
        if ( is_multicast_ether_addr ( remac ) )
                return &ipoib->broadcast.mac;

        /* Try to find via REMAC cache */
        list_for_each_entry ( peer, &ipoib->peers, list ) {
                if ( memcmp ( remac, &peer->remac,
                              sizeof ( peer->remac ) ) == 0 ) {
                        /* Move peer to start of list */
                        list_del ( &peer->list );
                        list_add ( &peer->list, &ipoib->peers );
                        return &peer->mac;
                }
        }

        DBGC ( ipoib, "IPoIB %p unknown REMAC %s\n",
               ipoib, eth_ntoa ( remac ) );
        return NULL;
}
static int ipoib_map_remac ( struct ipoib_device ipoib,
const struct ipoib_remac remac,
const struct ipoib_mac mac 
) [static]

Add IPoIB MAC to REMAC cache.

Parameters:
ipoibIPoIB device
remacRemote Ethernet MAC
macIPoIB MAC
Return values:
rcReturn status code

Definition at line 186 of file ipoib.c.

References ENOMEM, ipoib_peer::list, list_add, list_del, list_for_each_entry, ipoib_peer::mac, malloc(), memcmp(), memcpy(), ipoib_device::peers, and ipoib_peer::remac.

Referenced by ipoib_translate_rx_arp().

                                                           {
        struct ipoib_peer *peer;

        /* Check for existing entry in REMAC cache */
        list_for_each_entry ( peer, &ipoib->peers, list ) {
                if ( memcmp ( remac, &peer->remac,
                              sizeof ( peer->remac ) ) == 0 ) {
                        /* Move peer to start of list */
                        list_del ( &peer->list );
                        list_add ( &peer->list, &ipoib->peers );
                        /* Update MAC */
                        memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
                        return 0;
                }
        }

        /* Create new entry */
        peer = malloc ( sizeof ( *peer ) );
        if ( ! peer )
                return -ENOMEM;
        memcpy ( &peer->remac, remac, sizeof ( peer->remac ) );
        memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
        list_add ( &peer->list, &ipoib->peers );

        return 0;
}
static void ipoib_flush_remac ( struct ipoib_device ipoib) [static]

Flush REMAC cache.

Parameters:
ipoibIPoIB device

Definition at line 220 of file ipoib.c.

References free, ipoib_peer::list, list_del, list_for_each_entry_safe, and ipoib_device::peers.

Referenced by ipoib_close().

                                                             {
        struct ipoib_peer *peer;
        struct ipoib_peer *tmp;

        list_for_each_entry_safe ( peer, tmp, &ipoib->peers, list ) {
                list_del ( &peer->list );
                free ( peer );
        }
}
static unsigned int ipoib_discard_remac ( void  ) [static]

Discard some entries from the REMAC cache.

Return values:
discardedNumber of cached items discarded

Definition at line 235 of file ipoib.c.

References for_each_netdev, free, ipoib_operations, ipoib_peer::list, list_del, list_for_each_entry_reverse, netdev, net_device::op, ipoib_device::peers, and net_device::priv.

                                                 {
        struct net_device *netdev;
        struct ipoib_device *ipoib;
        struct ipoib_peer *peer;
        unsigned int discarded = 0;

        /* Try to discard one cache entry for each IPoIB device */
        for_each_netdev ( netdev ) {

                /* Skip non-IPoIB devices */
                if ( netdev->op != &ipoib_operations )
                        continue;
                ipoib = netdev->priv;

                /* Discard least recently used cache entry (if any) */
                list_for_each_entry_reverse ( peer, &ipoib->peers, list ) {
                        list_del ( &peer->list );
                        free ( peer );
                        discarded++;
                        break;
                }
        }

        return discarded;
}
struct cache_discarder ipoib_discarder __cache_discarder ( CACHE_EXPENSIVE  ) [read]

IPoIB cache discarder.

static void ipoib_init_addr ( const void *  hw_addr,
void *  ll_addr 
) [static]

Initialise IPoIB link-layer address.

Parameters:
hw_addrHardware address
ll_addrLink-layer address

Definition at line 279 of file ipoib.c.

References guid, and IPOIB_GUID_MASK.

                                                                   {
        const uint8_t *guid = hw_addr;
        uint8_t *eth_addr = ll_addr;
        uint8_t guid_mask = IPOIB_GUID_MASK;
        unsigned int i;

        /* Extract bytes from GUID according to mask */
        for ( i = 0 ; i < 8 ; i++, guid++, guid_mask <<= 1 ) {
                if ( guid_mask & 0x80 )
                        *(eth_addr++) = *guid;
        }
}
struct net_device* alloc_ipoibdev ( size_t  priv_size) [read]

Allocate IPoIB device.

Parameters:
priv_sizeSize of driver private data
Return values:
netdevNetwork device, or NULL

Definition at line 315 of file ipoib.c.

References alloc_netdev(), eth_broadcast, IB_MAX_PAYLOAD_SIZE, net_device::ll_broadcast, net_device::ll_protocol, net_device::max_pkt_len, and netdev.

Referenced by ipoib_probe().

                                                        {
        struct net_device *netdev;

        netdev = alloc_netdev ( priv_size );
        if ( netdev ) {
                netdev->ll_protocol = &ipoib_protocol;
                netdev->ll_broadcast = eth_broadcast;
                netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
        }
        return netdev;
}
static int ipoib_translate_tx_arp ( struct net_device netdev,
struct io_buffer iobuf 
) [static]

Translate transmitted ARP packet.

Parameters:
netdevNetwork device
iobufPacket to be transmitted (with no link-layer headers)
Return values:
rcReturn status code

Definition at line 341 of file ipoib.c.

References arphdr::ar_hln, arphdr::ar_hrd, arphdr::ar_op, arphdr::ar_pln, arp_sender_ha(), arp_sender_pa(), arp_target_ha(), arp_target_pa(), ARPHRD_INFINIBAND, ARPOP_REPLY, io_buffer::data, DBGC, ENOBUFS, ENXIO_ARP_REPLY, ETH_ALEN, eth_ntoa(), htons, iob_put, iob_tailroom(), ipoib_find_remac(), ipoib_device::mac, memcpy(), memset(), NULL, and net_device::priv.

Referenced by ipoib_translate_tx().

                                                              {
        struct ipoib_device *ipoib = netdev->priv;
        struct arphdr *arphdr = iobuf->data;
        struct ipoib_mac *target_ha = NULL;
        void *sender_pa;
        void *target_pa;

        /* Do nothing unless ARP contains eIPoIB link-layer addresses */
        if ( arphdr->ar_hln != ETH_ALEN )
                return 0;

        /* Fail unless we have room to expand packet */
        if ( iob_tailroom ( iobuf ) < ( 2 * ( sizeof ( ipoib->mac ) -
                                              ETH_ALEN ) ) ) {
                DBGC ( ipoib, "IPoIB %p insufficient space in TX ARP\n",
                       ipoib );
                return -ENOBUFS;
        }

        /* Look up REMAC, if applicable */
        if ( arphdr->ar_op == ARPOP_REPLY ) {
                target_ha = ipoib_find_remac ( ipoib, arp_target_pa ( arphdr ));
                if ( ! target_ha ) {
                        DBGC ( ipoib, "IPoIB %p no REMAC for %s ARP reply\n",
                               ipoib, eth_ntoa ( arp_target_pa ( arphdr ) ) );
                        return -ENXIO_ARP_REPLY;
                }
        }

        /* Construct new packet */
        iob_put ( iobuf, ( 2 * ( sizeof ( ipoib->mac ) - ETH_ALEN ) ) );
        sender_pa = arp_sender_pa ( arphdr );
        target_pa = arp_target_pa ( arphdr );
        arphdr->ar_hrd = htons ( ARPHRD_INFINIBAND );
        arphdr->ar_hln = sizeof ( ipoib->mac );
        memcpy ( arp_target_pa ( arphdr ), target_pa, arphdr->ar_pln );
        memcpy ( arp_sender_pa ( arphdr ), sender_pa, arphdr->ar_pln );
        memcpy ( arp_sender_ha ( arphdr ), &ipoib->mac, sizeof ( ipoib->mac ) );
        memset ( arp_target_ha ( arphdr ), 0, sizeof ( ipoib->mac ) );
        if ( target_ha ) {
                memcpy ( arp_target_ha ( arphdr ), target_ha,
                         sizeof ( *target_ha ) );
        }

        return 0;
}
static int ipoib_translate_tx ( struct net_device netdev,
struct io_buffer iobuf,
uint16_t  net_proto 
) [static]

Translate transmitted packet.

Parameters:
netdevNetwork device
iobufPacket to be transmitted (with no link-layer headers)
net_protoNetwork-layer protocol (in network byte order)
Return values:
rcReturn status code

Definition at line 397 of file ipoib.c.

References ENOTSUP, ETH_P_ARP, ETH_P_IP, htons, and ipoib_translate_tx_arp().

Referenced by ipoib_transmit().

                                                                              {

        switch ( net_proto ) {
        case htons ( ETH_P_ARP ) :
                return ipoib_translate_tx_arp ( netdev, iobuf );
        case htons ( ETH_P_IP ) :
                /* No translation needed */
                return 0;
        default:
                /* Cannot handle other traffic via eIPoIB */
                return -ENOTSUP;
        }
}
static int ipoib_translate_rx_arp ( struct net_device netdev,
struct io_buffer iobuf,
struct ipoib_remac remac 
) [static]

Translate received ARP packet.

Parameters:
netdevNetwork device
iobufReceived packet (with no link-layer headers)
remacConstructed Remote Ethernet MAC
Return values:
rcReturn status code

Definition at line 420 of file ipoib.c.

References arphdr::ar_hln, arphdr::ar_hrd, arphdr::ar_op, arphdr::ar_pln, arp_sender_ha(), arp_sender_pa(), arp_target_ha(), arp_target_pa(), ARPHRD_ETHER, ARPOP_REPLY, io_buffer::data, DBGC, ETH_ALEN, htons, iob_unput, ipoib_map_remac(), net_device::ll_addr, ipoib_device::mac, memcpy(), memset(), net_device::priv, rc, and strerror().

Referenced by ipoib_translate_rx().

                                                                {
        struct ipoib_device *ipoib = netdev->priv;
        struct arphdr *arphdr = iobuf->data;
        void *sender_pa;
        void *target_pa;
        int rc;

        /* Do nothing unless ARP contains IPoIB link-layer addresses */
        if ( arphdr->ar_hln != sizeof ( ipoib->mac ) )
                return 0;

        /* Create REMAC cache entry */
        if ( ( rc = ipoib_map_remac ( ipoib, remac,
                                      arp_sender_ha ( arphdr ) ) ) != 0 ) {
                DBGC ( ipoib, "IPoIB %p could not map REMAC: %s\n",
                       ipoib, strerror ( rc ) );
                return rc;
        }

        /* Construct new packet */
        sender_pa = arp_sender_pa ( arphdr );
        target_pa = arp_target_pa ( arphdr );
        arphdr->ar_hrd = htons ( ARPHRD_ETHER );
        arphdr->ar_hln = ETH_ALEN;
        memcpy ( arp_sender_pa ( arphdr ), sender_pa, arphdr->ar_pln );
        memcpy ( arp_target_pa ( arphdr ), target_pa, arphdr->ar_pln );
        memcpy ( arp_sender_ha ( arphdr ), remac, ETH_ALEN );
        memset ( arp_target_ha ( arphdr ), 0, ETH_ALEN );
        if ( arphdr->ar_op == ARPOP_REPLY ) {
                /* Assume received replies were directed to us */
                memcpy ( arp_target_ha ( arphdr ), netdev->ll_addr, ETH_ALEN );
        }
        iob_unput ( iobuf, ( 2 * ( sizeof ( ipoib->mac ) - ETH_ALEN ) ) );

        return 0;
}
static int ipoib_translate_rx ( struct net_device netdev,
struct io_buffer iobuf,
struct ipoib_remac remac,
uint16_t  net_proto 
) [static]

Translate received packet.

Parameters:
netdevNetwork device
iobufReceived packet (with no link-layer headers)
remacConstructed Remote Ethernet MAC
net_protoNetwork-layer protocol (in network byte order)
Return values:
rcReturn status code

Definition at line 468 of file ipoib.c.

References ENOTSUP, ETH_P_ARP, ETH_P_IP, htons, and ipoib_translate_rx_arp().

Referenced by ipoib_complete_recv().

                                                     {

        switch ( net_proto ) {
        case htons ( ETH_P_ARP ) :
                return ipoib_translate_rx_arp ( netdev, iobuf, remac );
        case htons ( ETH_P_IP ) :
                /* No translation needed */
                return 0;
        default:
                /* Cannot handle other traffic via eIPoIB */
                return -ENOTSUP;
        }
}
static int ipoib_transmit ( struct net_device netdev,
struct io_buffer iobuf 
) [static]

Transmit packet via IPoIB network device.

Parameters:
netdevNetwork device
iobufI/O buffer
Return values:
rcReturn status code

Definition at line 499 of file ipoib.c.

References arp_tx_request(), ipoib_broadcast::av, ipoib_device::broadcast, io_buffer::data, DBGC, iphdr::dest, dest, EINVAL, ENETUNREACH, ENXIO_ARP_SENT, ENXIO_NON_IPV4, eth_ntoa(), ETH_P_IP, ipoib_mac::flags__qpn, ipoib_mac::gid, ib_address_vector::gid, ib_address_vector::gid_present, ethhdr::h_dest, ethhdr::h_protocol, htons, ib_link_ok(), ib_post_send(), IB_QPN_MASK, ib_resolve_path(), ipoib_device::ibdev, inet_ntoa(), iob_len(), iob_pull, iob_push, ipoib_find_remac(), ipoib_translate_tx(), is_multicast_ether_addr(), mac, memcpy(), memset(), net_proto, ntohl, ntohs, net_device::priv, ipoib_hdr::proto, ib_address_vector::qkey, ipoib_device::qp, ib_address_vector::qpn, rc, ipoib_hdr::reserved, iphdr::src, and strerror().

                                                      {
        struct ipoib_device *ipoib = netdev->priv;
        struct ib_device *ibdev = ipoib->ibdev;
        struct ethhdr *ethhdr;
        struct iphdr *iphdr;
        struct ipoib_hdr *ipoib_hdr;
        struct ipoib_remac *remac;
        struct ipoib_mac *mac;
        struct ib_address_vector *dest;
        struct ib_address_vector av;
        uint16_t net_proto;
        int rc;

        /* Sanity check */
        if ( iob_len ( iobuf ) < sizeof ( *ethhdr ) ) {
                DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
                return -EINVAL;
        }

        /* Attempting transmission while link is down will put the
         * queue pair into an error state, so don't try it.
         */
        if ( ! ib_link_ok ( ibdev ) )
                return -ENETUNREACH;

        /* Strip eIPoIB header */
        ethhdr = iobuf->data;
        remac = ( ( struct ipoib_remac * ) ethhdr->h_dest );
        net_proto = ethhdr->h_protocol;
        iob_pull ( iobuf, sizeof ( *ethhdr ) );

        /* Identify destination address */
        if ( is_multicast_ether_addr ( remac ) ) {

                /* Transmit multicasts as broadcasts, for simplicity */
                dest = &ipoib->broadcast.av;

        } else if ( ( mac = ipoib_find_remac ( ipoib, remac ) ) ) {

                /* Construct address vector from IPoIB MAC */
                dest = &av;
                memset ( dest, 0, sizeof ( *dest ) );
                dest->qpn = ( ntohl ( mac->flags__qpn ) & IB_QPN_MASK );
                dest->qkey = ipoib->broadcast.av.qkey;
                dest->gid_present = 1;
                memcpy ( &dest->gid, &mac->gid, sizeof ( dest->gid ) );
                if ( ( rc = ib_resolve_path ( ibdev, dest ) ) != 0 ) {
                        /* Path not resolved yet */
                        return rc;
                }

        } else {

                /* Generate a new ARP request (if possible) to trigger
                 * population of the REMAC cache entry.
                 */
                if ( ( net_proto != htons ( ETH_P_IP ) ) ||
                     ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) ) {
                        DBGC ( ipoib, "IPoIB %p no REMAC for %s non-IPv4 "
                               "packet type %04x\n", ipoib,
                               eth_ntoa ( ethhdr->h_dest ),
                               ntohs ( net_proto ) );
                        return -ENXIO_NON_IPV4;
                }
                iphdr = iobuf->data;
                if ( ( rc = arp_tx_request ( netdev, &ipv4_protocol,
                                             &iphdr->dest, &iphdr->src ) ) !=0){
                        DBGC ( ipoib, "IPoIB %p could not ARP for %s/%s/",
                               ipoib, eth_ntoa ( ethhdr->h_dest ),
                               inet_ntoa ( iphdr->dest ) );
                        DBGC ( ipoib, "%s: %s\n", inet_ntoa ( iphdr->src ),
                               strerror ( rc ) );
                        return rc;
                }
                DBGC ( ipoib, "IPoIB %p no REMAC for %s/%s/", ipoib,
                       eth_ntoa ( ethhdr->h_dest ), inet_ntoa ( iphdr->dest ) );
                DBGC  ( ipoib, "%s\n", inet_ntoa ( iphdr->src ) );
                return -ENXIO_ARP_SENT;
        }

        /* Translate packet if applicable */
        if ( ( rc = ipoib_translate_tx ( netdev, iobuf, net_proto ) ) != 0 )
                return rc;

        /* Prepend real IPoIB header */
        ipoib_hdr = iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
        ipoib_hdr->proto = net_proto;
        ipoib_hdr->reserved = 0;

        /* Transmit packet */
        return ib_post_send ( ibdev, ipoib->qp, dest, iobuf );
}
static void ipoib_complete_send ( struct ib_device *ibdev  __unused,
struct ib_queue_pair qp,
struct io_buffer iobuf,
int  rc 
) [static]

Handle IPoIB send completion.

Parameters:
ibdevInfiniband device
qpQueue pair
iobufI/O buffer
rcCompletion status code

Definition at line 601 of file ipoib.c.

References ib_qp_get_ownerdata(), ipoib_device::netdev, and netdev_tx_complete_err().

                                                                    {
        struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );

        netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
}
static void ipoib_complete_recv ( struct ib_device *ibdev  __unused,
struct ib_queue_pair qp,
struct ib_address_vector dest,
struct ib_address_vector source,
struct io_buffer iobuf,
int  rc 
) [static]

Handle IPoIB receive completion.

Parameters:
ibdevInfiniband device
qpQueue pair
destDestination address vector, or NULL
sourceSource address vector, or NULL
iobufI/O buffer
rcCompletion status code

Definition at line 619 of file ipoib.c.

References io_buffer::data, DBGC, DBGC_HD, EIO, EIPOIB_QPN_LA, ENOTTY, eth_broadcast, ib_address_vector::gid, ib_address_vector::gid_present, ethhdr::h_dest, ethhdr::h_protocol, ethhdr::h_source, htonl, htons, IB_GID_MULTICAST, ib_qp_get_ownerdata(), iob_len(), iob_pull, iob_push, ipoib_translate_rx(), ipoib_remac::lid, ib_address_vector::lid, net_device::ll_addr, memcpy(), net_proto, netdev, ipoib_device::netdev, netdev_rx(), netdev_rx_err(), ipoib_hdr::proto, ipoib_remac::qpn, and ib_address_vector::qpn.

                                                                    {
        struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
        struct net_device *netdev = ipoib->netdev;
        struct ipoib_hdr *ipoib_hdr;
        struct ethhdr *ethhdr;
        struct ipoib_remac remac;
        uint16_t net_proto;

        /* Record errors */
        if ( rc != 0 ) {
                netdev_rx_err ( netdev, iobuf, rc );
                return;
        }

        /* Sanity check */
        if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
                DBGC ( ipoib, "IPoIB %p received packet too short to "
                       "contain IPoIB header\n", ipoib );
                DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
                netdev_rx_err ( netdev, iobuf, -EIO );
                return;
        }
        if ( ! source ) {
                DBGC ( ipoib, "IPoIB %p received packet without address "
                       "vector\n", ipoib );
                netdev_rx_err ( netdev, iobuf, -ENOTTY );
                return;
        }

        /* Strip real IPoIB header */
        ipoib_hdr = iobuf->data;
        net_proto = ipoib_hdr->proto;
        iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );

        /* Construct source address from remote QPN and LID */
        remac.qpn = htonl ( source->qpn | EIPOIB_QPN_LA );
        remac.lid = htons ( source->lid );

        /* Translate packet if applicable */
        if ( ( rc = ipoib_translate_rx ( netdev, iobuf, &remac,
                                         net_proto ) ) != 0 ) {
                netdev_rx_err ( netdev, iobuf, rc );
                return;
        }

        /* Prepend eIPoIB header */
        ethhdr = iob_push ( iobuf, sizeof ( *ethhdr ) );
        memcpy ( &ethhdr->h_source, &remac, sizeof ( ethhdr->h_source ) );
        ethhdr->h_protocol = net_proto;

        /* Construct destination address */
        if ( dest->gid_present && IB_GID_MULTICAST ( &dest->gid ) ) {
                /* Multicast GID: use the Ethernet broadcast address */
                memcpy ( &ethhdr->h_dest, eth_broadcast,
                         sizeof ( ethhdr->h_dest ) );
        } else {
                /* Assume destination address is local Ethernet MAC */
                memcpy ( &ethhdr->h_dest, netdev->ll_addr,
                         sizeof ( ethhdr->h_dest ) );
        }

        /* Hand off to network layer */
        netdev_rx ( netdev, iobuf );
}
static struct io_buffer* ipoib_alloc_iob ( size_t  len) [static, read]

Allocate IPoIB receive I/O buffer.

Parameters:
lenLength of buffer
Return values:
iobufI/O buffer, or NULL

Some Infiniband hardware requires 2kB alignment of receive buffers and provides no way to disable header separation. The result is that there are only four bytes of link-layer header (the real IPoIB header) before the payload. This is not sufficient space to insert an eIPoIB link-layer pseudo-header.

We therefore allocate I/O buffers offset to start slightly before the natural alignment boundary, in order to allow sufficient space.

Definition at line 709 of file ipoib.c.

References alloc_iob_raw(), and iob_reserve.

                                                         {
        struct io_buffer *iobuf;
        size_t reserve_len;

        /* Calculate additional length required at start of buffer */
        reserve_len = ( sizeof ( struct ethhdr ) -
                        sizeof ( struct ipoib_hdr ) );

        /* Allocate buffer */
        iobuf = alloc_iob_raw ( ( len + reserve_len ), len, -reserve_len );
        if ( iobuf ) {
                iob_reserve ( iobuf, reserve_len );
        }
        return iobuf;
}
static void ipoib_poll ( struct net_device netdev) [static]

Poll IPoIB network device.

Parameters:
netdevNetwork device

Definition at line 735 of file ipoib.c.

References ib_poll_eq(), ipoib_device::ibdev, net_device::priv, and retry_poll().

                                                     {
        struct ipoib_device *ipoib = netdev->priv;
        struct ib_device *ibdev = ipoib->ibdev;

        /* Poll Infiniband device */
        ib_poll_eq ( ibdev );

        /* Poll the retry timers (required for IPoIB multicast join) */
        retry_poll();
}
void ipoib_join_complete ( struct ib_mc_membership membership,
int  rc 
)

Handle IPv4 broadcast multicast group join completion.

Parameters:
membershipMulticast group membership
rcStatus code

Definition at line 752 of file ipoib.c.

References ipoib_device::broadcast, container_of, ipoib_broadcast::membership, ipoib_device::netdev, and netdev_link_err().

Referenced by ipoib_join_broadcast_group().

                                                                         {
        struct ipoib_device *ipoib = container_of ( membership,
                                                    struct ipoib_device,
                                                    broadcast.membership );

        /* Record join status as link status */
        netdev_link_err ( ipoib->netdev, rc );
}
static int ipoib_join_broadcast_group ( struct ipoib_device ipoib) [static]

Join IPv4 broadcast multicast group.

Parameters:
ipoibIPoIB device
Return values:
rcReturn status code

Definition at line 767 of file ipoib.c.

References ipoib_broadcast::av, ipoib_device::broadcast, DBGC, ib_mcast_join(), ipoib_device::ibdev, ipoib_join_complete(), ipoib_broadcast::membership, ipoib_device::qp, rc, and strerror().

Referenced by ipoib_link_state_changed().

                                                                     {
        int rc;

        /* Join multicast group */
        if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
                                    &ipoib->broadcast.membership,
                                    &ipoib->broadcast.av, 0,
                                    ipoib_join_complete ) ) != 0 ) {
                DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
                       ipoib, strerror ( rc ) );
                return rc;
        }

        return 0;
}
static void ipoib_leave_broadcast_group ( struct ipoib_device ipoib) [static]

Leave IPv4 broadcast multicast group.

Parameters:
ipoibIPoIB device

Definition at line 788 of file ipoib.c.

References ipoib_device::broadcast, ib_mcast_leave(), ipoib_device::ibdev, ipoib_broadcast::membership, and ipoib_device::qp.

Referenced by ipoib_close(), and ipoib_link_state_changed().

                                                                       {

        /* Leave multicast group */
        ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
                         &ipoib->broadcast.membership );
}
static void ipoib_link_state_changed ( struct ipoib_device ipoib) [static]

Handle link status change.

Parameters:
ipoibIPoIB device

Definition at line 800 of file ipoib.c.

References ipoib_broadcast::av, ipoib_device::broadcast, DBGC, EINPROGRESS_JOINING, ipoib_mac::gid, ib_address_vector::gid, ib_device::gid, ib_address_vector::gid_present, htons, ib_is_open(), ib_link_ok(), ib_link_rc(), IB_PKEY_FULL, IB_QPN_BROADCAST, ipoib_device::ibdev, ipoib_join_broadcast_group(), ipoib_leave_broadcast_group(), ipoib_broadcast::mac, ipoib_device::mac, memcpy(), memset(), netdev, ipoib_device::netdev, netdev_link_err(), ib_device::pkey, ib_gid::prefix, ipoib_device::qp, ib_address_vector::qpn, rc, ib_gid::s, strerror(), and ib_gid::words.

Referenced by ipoib_notify(), and ipoib_open().

                                                                    {
        struct ib_device *ibdev = ipoib->ibdev;
        struct net_device *netdev = ipoib->netdev;
        int rc;

        /* Leave existing broadcast group */
        if ( ipoib->qp )
                ipoib_leave_broadcast_group ( ipoib );

        /* Update MAC address based on potentially-new GID prefix */
        memcpy ( &ipoib->mac.gid.s.prefix, &ibdev->gid.s.prefix,
                 sizeof ( ipoib->mac.gid.s.prefix ) );

        /* Update broadcast MAC GID based on potentially-new partition key */
        ipoib->broadcast.mac.gid.words[2] =
                htons ( ibdev->pkey | IB_PKEY_FULL );

        /* Construct broadcast address vector from broadcast MAC address */
        memset ( &ipoib->broadcast.av, 0, sizeof ( ipoib->broadcast.av ) );
        ipoib->broadcast.av.qpn = IB_QPN_BROADCAST;
        ipoib->broadcast.av.gid_present = 1;
        memcpy ( &ipoib->broadcast.av.gid, &ipoib->broadcast.mac.gid,
                 sizeof ( ipoib->broadcast.av.gid ) );

        /* Set net device link state to reflect Infiniband link state */
        rc = ib_link_rc ( ibdev );
        netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );

        /* Join new broadcast group */
        if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) && ipoib->qp &&
             ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
                DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
                       "%s\n", ipoib, strerror ( rc ) );
                netdev_link_err ( netdev, rc );
                return;
        }
}
static int ipoib_open ( struct net_device netdev) [static]

Open IPoIB network device.

Parameters:
netdevNetwork device
Return values:
rcReturn status code

Definition at line 844 of file ipoib.c.

References ipoib_device::cq, DBGC, ipoib_mac::flags__qpn, htonl, ib_close(), ib_create_cq(), ib_create_qp(), ib_destroy_cq(), ib_destroy_qp(), ib_open(), ib_qp_set_ownerdata(), IB_QPT_UD, ib_refill_recv(), ipoib_device::ibdev, ipoib_link_state_changed(), IPOIB_NUM_CQES, IPOIB_NUM_RECV_WQES, IPOIB_NUM_SEND_WQES, ipoib_device::mac, net_device::name, net_device::priv, ipoib_device::qp, ib_queue_pair::qpn, rc, and strerror().

                                                    {
        struct ipoib_device *ipoib = netdev->priv;
        struct ib_device *ibdev = ipoib->ibdev;
        int rc;

        /* Open IB device */
        if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
                DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
                       ipoib, strerror ( rc ) );
                goto err_ib_open;
        }

        /* Allocate completion queue */
        if ( ( rc = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op,
                                   &ipoib->cq ) ) != 0 ) {
                DBGC ( ipoib, "IPoIB %p could not create completion queue: "
                       "%s\n", ipoib, strerror ( rc ) );
                goto err_create_cq;
        }

        /* Allocate queue pair */
        if ( ( rc = ib_create_qp ( ibdev, IB_QPT_UD, IPOIB_NUM_SEND_WQES,
                                   ipoib->cq, IPOIB_NUM_RECV_WQES, ipoib->cq,
                                   &ipoib_qp_op, netdev->name,
                                   &ipoib->qp ) ) != 0 ) {
                DBGC ( ipoib, "IPoIB %p could not create queue pair: %s\n",
                       ipoib, strerror ( rc ) );
                goto err_create_qp;
        }
        ib_qp_set_ownerdata ( ipoib->qp, ipoib );

        /* Update MAC address with QPN */
        ipoib->mac.flags__qpn = htonl ( ipoib->qp->qpn );

        /* Fill receive rings */
        ib_refill_recv ( ibdev, ipoib->qp );

        /* Fake a link status change to join the broadcast group */
        ipoib_link_state_changed ( ipoib );

        return 0;

        ib_destroy_qp ( ibdev, ipoib->qp );
 err_create_qp:
        ib_destroy_cq ( ibdev, ipoib->cq );
 err_create_cq:
        ib_close ( ibdev );
 err_ib_open:
        return rc;
}
static void ipoib_close ( struct net_device netdev) [static]

Close IPoIB network device.

Parameters:
netdevNetwork device

Definition at line 900 of file ipoib.c.

References ipoib_device::cq, ipoib_mac::flags__qpn, ib_close(), ib_destroy_cq(), ib_destroy_qp(), ipoib_device::ibdev, ipoib_flush_remac(), ipoib_leave_broadcast_group(), ipoib_device::mac, NULL, net_device::priv, and ipoib_device::qp.

                                                      {
        struct ipoib_device *ipoib = netdev->priv;
        struct ib_device *ibdev = ipoib->ibdev;

        /* Flush REMAC cache */
        ipoib_flush_remac ( ipoib );

        /* Leave broadcast group */
        ipoib_leave_broadcast_group ( ipoib );

        /* Remove QPN from MAC address */
        ipoib->mac.flags__qpn = 0;

        /* Tear down the queues */
        ib_destroy_qp ( ibdev, ipoib->qp );
        ipoib->qp = NULL;
        ib_destroy_cq ( ibdev, ipoib->cq );
        ipoib->cq = NULL;

        /* Close IB device */
        ib_close ( ibdev );
}
static int ipoib_probe ( struct ib_device ibdev) [static]

Probe IPoIB device.

Parameters:
ibdevInfiniband device
Return values:
rcReturn status code

Definition at line 937 of file ipoib.c.

References alloc_ipoibdev(), ipoib_device::broadcast, net_device::dev, ib_device::dev, ENOMEM, ETH_ALEN, ipoib_mac::gid, ib_device::gid, ib_gid::guid, net_device::hw_addr, ipoib_device::ibdev, INIT_LIST_HEAD, ib_device::lemac, ipoib_device::list, list_add_tail, list_del, net_device::ll_addr, ipoib_broadcast::mac, ipoib_device::mac, memcpy(), memset(), netdev, ipoib_device::netdev, netdev_init(), netdev_nullify(), netdev_put(), ipoib_device::peers, net_device::priv, rc, register_netdev(), ib_gid::s, and unregister_netdev().

                                                   {
        struct net_device *netdev;
        struct ipoib_device *ipoib;
        int rc;

        /* Allocate network device */
        netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
        if ( ! netdev )
                return -ENOMEM;
        netdev_init ( netdev, &ipoib_operations );
        ipoib = netdev->priv;
        netdev->dev = ibdev->dev;
        memset ( ipoib, 0, sizeof ( *ipoib ) );
        ipoib->netdev = netdev;
        ipoib->ibdev = ibdev;
        INIT_LIST_HEAD ( &ipoib->peers );

        /* Extract hardware address */
        memcpy ( netdev->hw_addr, &ibdev->gid.s.guid,
                 sizeof ( ibdev->gid.s.guid ) );
        memcpy ( netdev->ll_addr, ibdev->lemac, ETH_ALEN );

        /* Set local MAC address */
        memcpy ( &ipoib->mac.gid.s.guid, &ibdev->gid.s.guid,
                 sizeof ( ipoib->mac.gid.s.guid ) );

        /* Set default broadcast MAC address */
        memcpy ( &ipoib->broadcast.mac, &ipoib_broadcast,
                 sizeof ( ipoib->broadcast.mac ) );

        /* Add to list of IPoIB devices */
        list_add_tail ( &ipoib->list, &ipoib_devices );

        /* Register network device */
        if ( ( rc = register_netdev ( netdev ) ) != 0 )
                goto err_register_netdev;

        return 0;

        unregister_netdev ( netdev );
 err_register_netdev:
        list_del ( &ipoib->list );
        netdev_nullify ( netdev );
        netdev_put ( netdev );
        return rc;
}
static void ipoib_notify ( struct ib_device ibdev) [static]

Handle device or link status change.

Parameters:
ibdevInfiniband device

Definition at line 989 of file ipoib.c.

References ipoib_device::ibdev, ipoib_link_state_changed(), ipoib_device::list, and list_for_each_entry.

                                                     {
        struct ipoib_device *ipoib;

        /* Handle link status change for any attached IPoIB devices */
        list_for_each_entry ( ipoib, &ipoib_devices, list ) {
                if ( ipoib->ibdev != ibdev )
                        continue;
                ipoib_link_state_changed ( ipoib );
        }
}
static void ipoib_remove ( struct ib_device ibdev) [static]

Remove IPoIB device.

Parameters:
ibdevInfiniband device

Definition at line 1005 of file ipoib.c.

References ipoib_device::ibdev, ipoib_device::list, net_device::list, list_del, list_for_each_entry_safe, netdev, ipoib_device::netdev, netdev_nullify(), netdev_put(), and unregister_netdev().

                                                     {
        struct ipoib_device *ipoib;
        struct ipoib_device *tmp;
        struct net_device *netdev;

        /* Remove any attached IPoIB devices */
        list_for_each_entry_safe ( ipoib, tmp, &ipoib_devices, list ) {
                if ( ipoib->ibdev != ibdev )
                        continue;
                netdev = ipoib->netdev;
                unregister_netdev ( netdev );
                list_del ( &ipoib->list );
                netdev_nullify ( netdev );
                netdev_put ( netdev );
        }
}
struct net_device* ipoib_netdev ( struct ib_device ibdev) [read]

Find IPoIB network device.

Parameters:
ibdevInfiniband device
Return values:
netdevIPoIB network device, or NULL if not found

Definition at line 1036 of file ipoib.c.

References ipoib_device::ibdev, ipoib_device::list, list_for_each_entry, ipoib_device::netdev, and NULL.

Referenced by golan_register_ibdev().

                                                             {
        struct ipoib_device *ipoib;

        /* Find matching IPoIB device */
        list_for_each_entry ( ipoib, &ipoib_devices, list ) {
                if ( ipoib->ibdev != ibdev )
                        continue;
                return ipoib->netdev;
        }
        return NULL;
}

Variable Documentation

struct ipoib_mac ipoib_broadcast [static]
Initial value:
 {
        .flags__qpn = htonl ( IB_QPN_BROADCAST ),
        .gid.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
                       0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
}

Broadcast IPoIB address.

Definition at line 107 of file ipoib.c.

struct errortab ipoib_errors [] __errortab
Initial value:

Human-readable message for the link status.

Definition at line 119 of file ipoib.c.

static struct net_device_operations ipoib_operations [static]
Initial value:
 {
        .open           = ipoib_open,
        .close          = ipoib_close,
        .transmit       = ipoib_transmit,
        .poll           = ipoib_poll,
}

IPoIB network device operations.

Definition at line 126 of file ipoib.c.

Referenced by ipoib_discard_remac().

struct ll_protocol ipoib_protocol __ll_protocol
Initial value:
 {
        .name           = "IPoIB",
        .ll_proto       = htons ( ARPHRD_ETHER ),
        .hw_addr_len    = sizeof ( union ib_guid ),
        .ll_addr_len    = ETH_ALEN,
        .ll_header_len  = ETH_HLEN,
        .push           = eth_push,
        .pull           = eth_pull,
        .init_addr      = ipoib_init_addr,
        .ntoa           = eth_ntoa,
        .mc_hash        = eth_mc_hash,
        .eth_addr       = eth_eth_addr,
        .eui64          = eth_eui64,
        .flags          = LL_NAME_ONLY,
}

IPoIB protocol.

Ethernet protocol.

Definition at line 293 of file ipoib.c.

Initial value:
 {
        .complete_send = ipoib_complete_send,
        .complete_recv = ipoib_complete_recv,
}

IPoIB completion operations.

Definition at line 689 of file ipoib.c.

Initial value:
 {
        .alloc_iob = ipoib_alloc_iob,
}

IPoIB queue pair operations.

Definition at line 726 of file ipoib.c.

struct ib_driver ipoib_driver __ib_driver
Initial value:
 {
        .name = "IPoIB",
        .probe = ipoib_probe,
        .notify = ipoib_notify,
        .remove = ipoib_remove,
}

IPoIB driver.

Definition at line 1023 of file ipoib.c.