iPXE
eoib.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License as
00006  * published by the Free Software Foundation; either version 2 of the
00007  * License, or any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00017  * 02110-1301, USA.
00018  *
00019  * You can also choose to distribute this program under the terms of
00020  * the Unmodified Binary Distribution Licence (as given in the file
00021  * COPYING.UBDL), provided that you have satisfied its requirements.
00022  */
00023 
00024 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
00025 
00026 #include <stdio.h>
00027 #include <string.h>
00028 #include <errno.h>
00029 #include <ipxe/errortab.h>
00030 #include <ipxe/malloc.h>
00031 #include <ipxe/iobuf.h>
00032 #include <ipxe/if_ether.h>
00033 #include <ipxe/netdevice.h>
00034 #include <ipxe/ethernet.h>
00035 #include <ipxe/infiniband.h>
00036 #include <ipxe/ib_mcast.h>
00037 #include <ipxe/ib_pathrec.h>
00038 #include <ipxe/eoib.h>
00039 
00040 /** @file
00041  *
00042  * Ethernet over Infiniband
00043  *
00044  */
00045 
00046 /** Number of EoIB send work queue entries */
00047 #define EOIB_NUM_SEND_WQES 8
00048 
00049 /** Number of EoIB receive work queue entries */
00050 #define EOIB_NUM_RECV_WQES 4
00051 
00052 /** Number of EoIB completion queue entries */
00053 #define EOIB_NUM_CQES 16
00054 
00055 /** Link status for "broadcast join in progress" */
00056 #define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
00057 #define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
00058         ( EINFO_EINPROGRESS, 0x01, "Joining" )
00059 
00060 /** Human-readable message for the link status */
00061 struct errortab eoib_errors[] __errortab = {
00062         __einfo_errortab ( EINFO_EINPROGRESS_JOINING ),
00063 };
00064 
00065 /** List of EoIB devices */
00066 static LIST_HEAD ( eoib_devices );
00067 
00068 static struct net_device_operations eoib_operations;
00069 
00070 /****************************************************************************
00071  *
00072  * EoIB peer cache
00073  *
00074  ****************************************************************************
00075  */
00076 
00077 /** An EoIB peer cache entry */
00078 struct eoib_peer {
00079         /** List of EoIB peer cache entries */
00080         struct list_head list;
00081         /** Ethernet MAC */
00082         uint8_t mac[ETH_ALEN];
00083         /** Infiniband address vector */
00084         struct ib_address_vector av;
00085 };
00086 
00087 /**
00088  * Find EoIB peer cache entry
00089  *
00090  * @v eoib              EoIB device
00091  * @v mac               Ethernet MAC
00092  * @ret peer            EoIB peer, or NULL if not found
00093  */
00094 static struct eoib_peer * eoib_find_peer ( struct eoib_device *eoib,
00095                                            const uint8_t *mac ) {
00096         struct eoib_peer *peer;
00097 
00098         /* Find peer cache entry */
00099         list_for_each_entry ( peer, &eoib->peers, list ) {
00100                 if ( memcmp ( mac, peer->mac, sizeof ( peer->mac ) ) == 0 ) {
00101                         /* Move peer to start of list */
00102                         list_del ( &peer->list );
00103                         list_add ( &peer->list, &eoib->peers );
00104                         return peer;
00105                 }
00106         }
00107 
00108         return NULL;
00109 }
00110 
00111 /**
00112  * Create EoIB peer cache entry
00113  *
00114  * @v eoib              EoIB device
00115  * @v mac               Ethernet MAC
00116  * @ret peer            EoIB peer, or NULL on error
00117  */
00118 static struct eoib_peer * eoib_create_peer ( struct eoib_device *eoib,
00119                                              const uint8_t *mac ) {
00120         struct eoib_peer *peer;
00121 
00122         /* Allocate and initialise peer cache entry */
00123         peer = zalloc ( sizeof ( *peer ) );
00124         if ( peer ) {
00125                 memcpy ( peer->mac, mac, sizeof ( peer->mac ) );
00126                 list_add ( &peer->list, &eoib->peers );
00127         }
00128         return peer;
00129 }
00130 
00131 /**
00132  * Flush EoIB peer cache
00133  *
00134  * @v eoib              EoIB device
00135  */
00136 static void eoib_flush_peers ( struct eoib_device *eoib ) {
00137         struct eoib_peer *peer;
00138         struct eoib_peer *tmp;
00139 
00140         list_for_each_entry_safe ( peer, tmp, &eoib->peers, list ) {
00141                 list_del ( &peer->list );
00142                 free ( peer );
00143         }
00144 }
00145 
00146 /**
00147  * Discard some entries from the peer cache
00148  *
00149  * @ret discarded       Number of cached items discarded
00150  */
00151 static unsigned int eoib_discard ( void ) {
00152         struct net_device *netdev;
00153         struct eoib_device *eoib;
00154         struct eoib_peer *peer;
00155         unsigned int discarded = 0;
00156 
00157         /* Try to discard one cache entry for each EoIB device */
00158         for_each_netdev ( netdev ) {
00159 
00160                 /* Skip non-EoIB devices */
00161                 if ( netdev->op != &eoib_operations )
00162                         continue;
00163                 eoib = netdev->priv;
00164 
00165                 /* Discard least recently used cache entry (if any) */
00166                 list_for_each_entry_reverse ( peer, &eoib->peers, list ) {
00167                         list_del ( &peer->list );
00168                         free ( peer );
00169                         discarded++;
00170                         break;
00171                 }
00172         }
00173 
00174         return discarded;
00175 }
00176 
00177 /** EoIB cache discarder */
00178 struct cache_discarder eoib_discarder __cache_discarder ( CACHE_EXPENSIVE ) = {
00179         .discard = eoib_discard,
00180 };
00181 
00182 /**
00183  * Find destination address vector
00184  *
00185  * @v eoib              EoIB device
00186  * @v mac               Ethernet MAC
00187  * @ret av              Address vector, or NULL to send as broadcast
00188  */
00189 static struct ib_address_vector * eoib_tx_av ( struct eoib_device *eoib,
00190                                                const uint8_t *mac ) {
00191         struct ib_device *ibdev = eoib->ibdev;
00192         struct eoib_peer *peer;
00193         int rc;
00194 
00195         /* If this is a broadcast or multicast MAC address, then send
00196          * this packet as a broadcast.
00197          */
00198         if ( is_multicast_ether_addr ( mac ) ) {
00199                 DBGCP ( eoib, "EoIB %s %s TX multicast\n",
00200                         eoib->name, eth_ntoa ( mac ) );
00201                 return NULL;
00202         }
00203 
00204         /* If we have no peer cache entry, then create one and send
00205          * this packet as a broadcast.
00206          */
00207         peer = eoib_find_peer ( eoib, mac );
00208         if ( ! peer ) {
00209                 DBGC ( eoib, "EoIB %s %s TX unknown\n",
00210                        eoib->name, eth_ntoa ( mac ) );
00211                 eoib_create_peer ( eoib, mac );
00212                 return NULL;
00213         }
00214 
00215         /* If we have not yet recorded a received GID and QPN for this
00216          * peer cache entry, then send this packet as a broadcast.
00217          */
00218         if ( ! peer->av.gid_present ) {
00219                 DBGCP ( eoib, "EoIB %s %s TX not yet recorded\n",
00220                         eoib->name, eth_ntoa ( mac ) );
00221                 return NULL;
00222         }
00223 
00224         /* If we have not yet resolved a path to this peer, then send
00225          * this packet as a broadcast.
00226          */
00227         if ( ( rc = ib_resolve_path ( ibdev, &peer->av ) ) != 0 ) {
00228                 DBGCP ( eoib, "EoIB %s %s TX not yet resolved\n",
00229                         eoib->name, eth_ntoa ( mac ) );
00230                 return NULL;
00231         }
00232 
00233         /* Force use of GRH even for local destinations */
00234         peer->av.gid_present = 1;
00235 
00236         /* We have a fully resolved peer: send this packet as a
00237          * unicast.
00238          */
00239         DBGCP ( eoib, "EoIB %s %s TX " IB_GID_FMT " QPN %#lx\n", eoib->name,
00240                 eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
00241         return &peer->av;
00242 }
00243 
00244 /**
00245  * Record source address vector
00246  *
00247  * @v eoib              EoIB device
00248  * @v mac               Ethernet MAC
00249  * @v lid               Infiniband LID
00250  */
00251 static void eoib_rx_av ( struct eoib_device *eoib, const uint8_t *mac,
00252                          const struct ib_address_vector *av ) {
00253         const union ib_gid *gid = &av->gid;
00254         unsigned long qpn = av->qpn;
00255         struct eoib_peer *peer;
00256 
00257         /* Sanity checks */
00258         if ( ! av->gid_present ) {
00259                 DBGC ( eoib, "EoIB %s %s RX with no GID\n",
00260                        eoib->name, eth_ntoa ( mac ) );
00261                 return;
00262         }
00263 
00264         /* Find peer cache entry (if any) */
00265         peer = eoib_find_peer ( eoib, mac );
00266         if ( ! peer ) {
00267                 DBGCP ( eoib, "EoIB %s %s RX " IB_GID_FMT " (ignored)\n",
00268                         eoib->name, eth_ntoa ( mac ), IB_GID_ARGS ( gid ) );
00269                 return;
00270         }
00271 
00272         /* Some dubious EoIB implementations utilise an Ethernet-to-
00273          * EoIB gateway that will send packets from the wrong QPN.
00274          */
00275         if ( eoib_has_gateway ( eoib ) &&
00276              ( memcmp ( gid, &eoib->gateway.gid, sizeof ( *gid ) ) == 0 ) ) {
00277                 qpn = eoib->gateway.qpn;
00278         }
00279 
00280         /* Do nothing if peer cache entry is complete and correct */
00281         if ( ( peer->av.lid == av->lid ) && ( peer->av.qpn == qpn ) ) {
00282                 DBGCP ( eoib, "EoIB %s %s RX unchanged\n",
00283                         eoib->name, eth_ntoa ( mac ) );
00284                 return;
00285         }
00286 
00287         /* Update peer cache entry */
00288         peer->av.qpn = qpn;
00289         peer->av.qkey = eoib->broadcast.qkey;
00290         peer->av.gid_present = 1;
00291         memcpy ( &peer->av.gid, gid, sizeof ( peer->av.gid ) );
00292         DBGC ( eoib, "EoIB %s %s RX " IB_GID_FMT " QPN %#lx\n", eoib->name,
00293                eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
00294 }
00295 
00296 /****************************************************************************
00297  *
00298  * EoIB network device
00299  *
00300  ****************************************************************************
00301  */
00302 
00303 /**
00304  * Transmit packet via EoIB network device
00305  *
00306  * @v netdev            Network device
00307  * @v iobuf             I/O buffer
00308  * @ret rc              Return status code
00309  */
00310 static int eoib_transmit ( struct net_device *netdev,
00311                            struct io_buffer *iobuf ) {
00312         struct eoib_device *eoib = netdev->priv;
00313         struct eoib_header *eoib_hdr;
00314         struct ethhdr *ethhdr;
00315         struct ib_address_vector *av;
00316         size_t zlen;
00317 
00318         /* Sanity checks */
00319         assert ( iob_len ( iobuf ) >= sizeof ( *ethhdr ) );
00320         assert ( iob_headroom ( iobuf ) >= sizeof ( *eoib_hdr ) );
00321 
00322         /* Look up destination address vector */
00323         ethhdr = iobuf->data;
00324         av = eoib_tx_av ( eoib, ethhdr->h_dest );
00325 
00326         /* Prepend EoIB header */
00327         eoib_hdr = iob_push ( iobuf, sizeof ( *eoib_hdr ) );
00328         eoib_hdr->magic = htons ( EOIB_MAGIC );
00329         eoib_hdr->reserved = 0;
00330 
00331         /* Pad buffer to minimum Ethernet frame size */
00332         zlen = ( sizeof ( *eoib_hdr ) + ETH_ZLEN );
00333         assert ( zlen <= IOB_ZLEN );
00334         if ( iob_len ( iobuf ) < zlen )
00335                 iob_pad ( iobuf, zlen );
00336 
00337         /* If we have no unicast address then send as a broadcast,
00338          * with a duplicate sent to the gateway if applicable.
00339          */
00340         if ( ! av ) {
00341                 av = &eoib->broadcast;
00342                 if ( eoib_has_gateway ( eoib ) )
00343                         eoib->duplicate ( eoib, iobuf );
00344         }
00345 
00346         /* Post send work queue entry */
00347         return ib_post_send ( eoib->ibdev, eoib->qp, av, iobuf );
00348 }
00349 
00350 /**
00351  * Handle EoIB send completion
00352  *
00353  * @v ibdev             Infiniband device
00354  * @v qp                Queue pair
00355  * @v iobuf             I/O buffer
00356  * @v rc                Completion status code
00357  */
00358 static void eoib_complete_send ( struct ib_device *ibdev __unused,
00359                                  struct ib_queue_pair *qp,
00360                                  struct io_buffer *iobuf, int rc ) {
00361         struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
00362 
00363         netdev_tx_complete_err ( eoib->netdev, iobuf, rc );
00364 }
00365 
00366 /**
00367  * Handle EoIB receive completion
00368  *
00369  * @v ibdev             Infiniband device
00370  * @v qp                Queue pair
00371  * @v dest              Destination address vector, or NULL
00372  * @v source            Source address vector, or NULL
00373  * @v iobuf             I/O buffer
00374  * @v rc                Completion status code
00375  */
00376 static void eoib_complete_recv ( struct ib_device *ibdev __unused,
00377                                  struct ib_queue_pair *qp,
00378                                  struct ib_address_vector *dest __unused,
00379                                  struct ib_address_vector *source,
00380                                  struct io_buffer *iobuf, int rc ) {
00381         struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
00382         struct net_device *netdev = eoib->netdev;
00383         struct eoib_header *eoib_hdr;
00384         struct ethhdr *ethhdr;
00385 
00386         /* Record errors */
00387         if ( rc != 0 ) {
00388                 netdev_rx_err ( netdev, iobuf, rc );
00389                 return;
00390         }
00391 
00392         /* Sanity check */
00393         if ( iob_len ( iobuf ) < ( sizeof ( *eoib_hdr ) + sizeof ( *ethhdr ) )){
00394                 DBGC ( eoib, "EoIB %s received packet too short to "
00395                        "contain EoIB and Ethernet headers\n", eoib->name );
00396                 DBGC_HD ( eoib, iobuf->data, iob_len ( iobuf ) );
00397                 netdev_rx_err ( netdev, iobuf, -EIO );
00398                 return;
00399         }
00400         if ( ! source ) {
00401                 DBGC ( eoib, "EoIB %s received packet without address "
00402                        "vector\n", eoib->name );
00403                 netdev_rx_err ( netdev, iobuf, -ENOTTY );
00404                 return;
00405         }
00406 
00407         /* Strip EoIB header */
00408         iob_pull ( iobuf, sizeof ( *eoib_hdr ) );
00409 
00410         /* Update neighbour cache entry, if any */
00411         ethhdr = iobuf->data;
00412         eoib_rx_av ( eoib, ethhdr->h_source, source );
00413 
00414         /* Hand off to network layer */
00415         netdev_rx ( netdev, iobuf );
00416 }
00417 
00418 /** EoIB completion operations */
00419 static struct ib_completion_queue_operations eoib_cq_op = {
00420         .complete_send = eoib_complete_send,
00421         .complete_recv = eoib_complete_recv,
00422 };
00423 
00424 /** EoIB queue pair operations */
00425 static struct ib_queue_pair_operations eoib_qp_op = {
00426         .alloc_iob = alloc_iob,
00427 };
00428 
00429 /**
00430  * Poll EoIB network device
00431  *
00432  * @v netdev            Network device
00433  */
00434 static void eoib_poll ( struct net_device *netdev ) {
00435         struct eoib_device *eoib = netdev->priv;
00436         struct ib_device *ibdev = eoib->ibdev;
00437 
00438         /* Poll Infiniband device */
00439         ib_poll_eq ( ibdev );
00440 
00441         /* Poll the retry timers (required for EoIB multicast join) */
00442         retry_poll();
00443 }
00444 
00445 /**
00446  * Handle EoIB broadcast multicast group join completion
00447  *
00448  * @v membership        Multicast group membership
00449  * @v rc                Status code
00450  */
00451 static void eoib_join_complete ( struct ib_mc_membership *membership, int rc ) {
00452         struct eoib_device *eoib =
00453                 container_of ( membership, struct eoib_device, membership );
00454 
00455         /* Record join status as link status */
00456         netdev_link_err ( eoib->netdev, rc );
00457 }
00458 
00459 /**
00460  * Join EoIB broadcast multicast group
00461  *
00462  * @v eoib              EoIB device
00463  * @ret rc              Return status code
00464  */
00465 static int eoib_join_broadcast_group ( struct eoib_device *eoib ) {
00466         int rc;
00467 
00468         /* Join multicast group */
00469         if ( ( rc = ib_mcast_join ( eoib->ibdev, eoib->qp,
00470                                     &eoib->membership, &eoib->broadcast,
00471                                     eoib->mask, eoib_join_complete ) ) != 0 ) {
00472                 DBGC ( eoib, "EoIB %s could not join broadcast group: %s\n",
00473                        eoib->name, strerror ( rc ) );
00474                 return rc;
00475         }
00476 
00477         return 0;
00478 }
00479 
00480 /**
00481  * Leave EoIB broadcast multicast group
00482  *
00483  * @v eoib              EoIB device
00484  */
00485 static void eoib_leave_broadcast_group ( struct eoib_device *eoib ) {
00486 
00487         /* Leave multicast group */
00488         ib_mcast_leave ( eoib->ibdev, eoib->qp, &eoib->membership );
00489 }
00490 
00491 /**
00492  * Handle link status change
00493  *
00494  * @v eoib              EoIB device
00495  */
00496 static void eoib_link_state_changed ( struct eoib_device *eoib ) {
00497         struct net_device *netdev = eoib->netdev;
00498         struct ib_device *ibdev = eoib->ibdev;
00499         int rc;
00500 
00501         /* Leave existing broadcast group */
00502         if ( eoib->qp )
00503                 eoib_leave_broadcast_group ( eoib );
00504 
00505         /* Update broadcast GID based on potentially-new partition key */
00506         eoib->broadcast.gid.words[2] = htons ( ibdev->pkey | IB_PKEY_FULL );
00507 
00508         /* Set net device link state to reflect Infiniband link state */
00509         rc = ib_link_rc ( ibdev );
00510         netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
00511 
00512         /* Join new broadcast group */
00513         if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) && eoib->qp &&
00514              ( ( rc = eoib_join_broadcast_group ( eoib ) ) != 0 ) ) {
00515                 DBGC ( eoib, "EoIB %s could not rejoin broadcast group: "
00516                        "%s\n", eoib->name, strerror ( rc ) );
00517                 netdev_link_err ( netdev, rc );
00518                 return;
00519         }
00520 }
00521 
00522 /**
00523  * Open EoIB network device
00524  *
00525  * @v netdev            Network device
00526  * @ret rc              Return status code
00527  */
00528 static int eoib_open ( struct net_device *netdev ) {
00529         struct eoib_device *eoib = netdev->priv;
00530         struct ib_device *ibdev = eoib->ibdev;
00531         int rc;
00532 
00533         /* Open IB device */
00534         if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
00535                 DBGC ( eoib, "EoIB %s could not open %s: %s\n",
00536                        eoib->name, ibdev->name, strerror ( rc ) );
00537                 goto err_ib_open;
00538         }
00539 
00540         /* Allocate completion queue */
00541         if ( ( rc = ib_create_cq ( ibdev, EOIB_NUM_CQES, &eoib_cq_op,
00542                                    &eoib->cq ) ) != 0 ) {
00543                 DBGC ( eoib, "EoIB %s could not create completion queue: %s\n",
00544                        eoib->name, strerror ( rc ) );
00545                 goto err_create_cq;
00546         }
00547 
00548         /* Allocate queue pair */
00549         if ( ( rc = ib_create_qp ( ibdev, IB_QPT_UD, EOIB_NUM_SEND_WQES,
00550                                    eoib->cq, EOIB_NUM_RECV_WQES, eoib->cq,
00551                                    &eoib_qp_op, netdev->name, &eoib->qp ) )!=0){
00552                 DBGC ( eoib, "EoIB %s could not create queue pair: %s\n",
00553                        eoib->name, strerror ( rc ) );
00554                 goto err_create_qp;
00555         }
00556         ib_qp_set_ownerdata ( eoib->qp, eoib );
00557 
00558         /* Fill receive rings */
00559         ib_refill_recv ( ibdev, eoib->qp );
00560 
00561         /* Fake a link status change to join the broadcast group */
00562         eoib_link_state_changed ( eoib );
00563 
00564         return 0;
00565 
00566         ib_destroy_qp ( ibdev, eoib->qp );
00567         eoib->qp = NULL;
00568  err_create_qp:
00569         ib_destroy_cq ( ibdev, eoib->cq );
00570         eoib->cq = NULL;
00571  err_create_cq:
00572         ib_close ( ibdev );
00573  err_ib_open:
00574         return rc;
00575 }
00576 
00577 /**
00578  * Close EoIB network device
00579  *
00580  * @v netdev            Network device
00581  */
00582 static void eoib_close ( struct net_device *netdev ) {
00583         struct eoib_device *eoib = netdev->priv;
00584         struct ib_device *ibdev = eoib->ibdev;
00585 
00586         /* Flush peer cache */
00587         eoib_flush_peers ( eoib );
00588 
00589         /* Leave broadcast group */
00590         eoib_leave_broadcast_group ( eoib );
00591 
00592         /* Tear down the queues */
00593         ib_destroy_qp ( ibdev, eoib->qp );
00594         eoib->qp = NULL;
00595         ib_destroy_cq ( ibdev, eoib->cq );
00596         eoib->cq = NULL;
00597 
00598         /* Close IB device */
00599         ib_close ( ibdev );
00600 }
00601 
00602 /** EoIB network device operations */
00603 static struct net_device_operations eoib_operations = {
00604         .open           = eoib_open,
00605         .close          = eoib_close,
00606         .transmit       = eoib_transmit,
00607         .poll           = eoib_poll,
00608 };
00609 
00610 /**
00611  * Create EoIB device
00612  *
00613  * @v ibdev             Infiniband device
00614  * @v hw_addr           Ethernet MAC
00615  * @v broadcast         Broadcast address vector
00616  * @v name              Interface name (or NULL to use default)
00617  * @ret rc              Return status code
00618  */
00619 int eoib_create ( struct ib_device *ibdev, const uint8_t *hw_addr,
00620                   struct ib_address_vector *broadcast, const char *name ) {
00621         struct net_device *netdev;
00622         struct eoib_device *eoib;
00623         int rc;
00624 
00625         /* Allocate network device */
00626         netdev = alloc_etherdev ( sizeof ( *eoib ) );
00627         if ( ! netdev ) {
00628                 rc = -ENOMEM;
00629                 goto err_alloc;
00630         }
00631         netdev_init ( netdev, &eoib_operations );
00632         eoib = netdev->priv;
00633         netdev->dev = ibdev->dev;
00634         eoib->netdev = netdev;
00635         eoib->ibdev = ibdev_get ( ibdev );
00636         memcpy ( &eoib->broadcast, broadcast, sizeof ( eoib->broadcast ) );
00637         INIT_LIST_HEAD ( &eoib->peers );
00638 
00639         /* Set MAC address */
00640         memcpy ( netdev->hw_addr, hw_addr, ETH_ALEN );
00641 
00642         /* Set interface name, if applicable */
00643         if ( name )
00644                 snprintf ( netdev->name, sizeof ( netdev->name ), "%s", name );
00645         eoib->name = netdev->name;
00646 
00647         /* Add to list of EoIB devices */
00648         list_add_tail ( &eoib->list, &eoib_devices );
00649 
00650         /* Register network device */
00651         if ( ( rc = register_netdev ( netdev ) ) != 0 )
00652                 goto err_register;
00653 
00654         DBGC ( eoib, "EoIB %s created for %s MAC %s\n",
00655                eoib->name, ibdev->name, eth_ntoa ( hw_addr ) );
00656         DBGC ( eoib, "EoIB %s broadcast GID " IB_GID_FMT "\n",
00657                eoib->name, IB_GID_ARGS ( &broadcast->gid ) );
00658         return 0;
00659 
00660         unregister_netdev ( netdev );
00661  err_register:
00662         list_del ( &eoib->list );
00663         ibdev_put ( ibdev );
00664         netdev_nullify ( netdev );
00665         netdev_put ( netdev );
00666  err_alloc:
00667         return rc;
00668 }
00669 
00670 /**
00671  * Find EoIB device
00672  *
00673  * @v ibdev             Infiniband device
00674  * @v hw_addr           Original Ethernet MAC
00675  * @ret eoib            EoIB device
00676  */
00677 struct eoib_device * eoib_find ( struct ib_device *ibdev,
00678                                  const uint8_t *hw_addr ) {
00679         struct eoib_device *eoib;
00680 
00681         list_for_each_entry ( eoib, &eoib_devices, list ) {
00682                 if ( ( eoib->ibdev == ibdev ) &&
00683                      ( memcmp ( eoib->netdev->hw_addr, hw_addr,
00684                                 ETH_ALEN ) == 0 ) )
00685                         return eoib;
00686         }
00687         return NULL;
00688 }
00689 
00690 /**
00691  * Remove EoIB device
00692  *
00693  * @v eoib              EoIB device
00694  */
00695 void eoib_destroy ( struct eoib_device *eoib ) {
00696         struct net_device *netdev = eoib->netdev;
00697 
00698         /* Unregister network device */
00699         unregister_netdev ( netdev );
00700 
00701         /* Remove from list of network devices */
00702         list_del ( &eoib->list );
00703 
00704         /* Drop reference to Infiniband device */
00705         ibdev_put ( eoib->ibdev );
00706 
00707         /* Free network device */
00708         DBGC ( eoib, "EoIB %s destroyed\n", eoib->name );
00709         netdev_nullify ( netdev );
00710         netdev_put ( netdev );
00711 }
00712 
00713 /**
00714  * Probe EoIB device
00715  *
00716  * @v ibdev             Infiniband device
00717  * @ret rc              Return status code
00718  */
00719 static int eoib_probe ( struct ib_device *ibdev __unused ) {
00720 
00721         /* EoIB devices are not created automatically */
00722         return 0;
00723 }
00724 
00725 /**
00726  * Handle device or link status change
00727  *
00728  * @v ibdev             Infiniband device
00729  */
00730 static void eoib_notify ( struct ib_device *ibdev ) {
00731         struct eoib_device *eoib;
00732 
00733         /* Handle link status change for any attached EoIB devices */
00734         list_for_each_entry ( eoib, &eoib_devices, list ) {
00735                 if ( eoib->ibdev != ibdev )
00736                         continue;
00737                 eoib_link_state_changed ( eoib );
00738         }
00739 }
00740 
00741 /**
00742  * Remove EoIB device
00743  *
00744  * @v ibdev             Infiniband device
00745  */
00746 static void eoib_remove ( struct ib_device *ibdev ) {
00747         struct eoib_device *eoib;
00748         struct eoib_device *tmp;
00749 
00750         /* Remove any attached EoIB devices */
00751         list_for_each_entry_safe ( eoib, tmp, &eoib_devices, list ) {
00752                 if ( eoib->ibdev != ibdev )
00753                         continue;
00754                 eoib_destroy ( eoib );
00755         }
00756 }
00757 
00758 /** EoIB driver */
00759 struct ib_driver eoib_driver __ib_driver = {
00760         .name = "EoIB",
00761         .probe = eoib_probe,
00762         .notify = eoib_notify,
00763         .remove = eoib_remove,
00764 };
00765 
00766 /****************************************************************************
00767  *
00768  * EoIB heartbeat packets
00769  *
00770  ****************************************************************************
00771  */
00772 
00773 /**
00774  * Silently ignore incoming EoIB heartbeat packets
00775  *
00776  * @v iobuf             I/O buffer
00777  * @v netdev            Network device
00778  * @v ll_source         Link-layer source address
00779  * @v flags             Packet flags
00780  * @ret rc              Return status code
00781  */
00782 static int eoib_heartbeat_rx ( struct io_buffer *iobuf,
00783                                struct net_device *netdev __unused,
00784                                const void *ll_dest __unused,
00785                                const void *ll_source __unused,
00786                                unsigned int flags __unused ) {
00787         free_iob ( iobuf );
00788         return 0;
00789 }
00790 
00791 /**
00792  * Transcribe EoIB heartbeat address
00793  *
00794  * @v net_addr          EoIB heartbeat address
00795  * @ret string          "<EoIB>"
00796  *
00797  * This operation is meaningless for the EoIB heartbeat protocol.
00798  */
00799 static const char * eoib_heartbeat_ntoa ( const void *net_addr __unused ) {
00800         return "<EoIB>";
00801 }
00802 
00803 /** EoIB heartbeat network protocol */
00804 struct net_protocol eoib_heartbeat_protocol __net_protocol = {
00805         .name = "EoIB",
00806         .net_proto = htons ( EOIB_MAGIC ),
00807         .rx = eoib_heartbeat_rx,
00808         .ntoa = eoib_heartbeat_ntoa,
00809 };
00810 
00811 /****************************************************************************
00812  *
00813  * EoIB gateway
00814  *
00815  ****************************************************************************
00816  *
00817  * Some dubious EoIB implementations require all broadcast traffic to
00818  * be sent twice: once to the actual broadcast group, and once as a
00819  * unicast to the EoIB-to-Ethernet gateway.  This somewhat curious
00820  * design arises since the EoIB-to-Ethernet gateway hardware lacks the
00821  * ability to attach a queue pair to a multicast GID (or LID), and so
00822  * cannot receive traffic sent to the broadcast group.
00823  *
00824  */
00825 
00826 /**
00827  * Transmit duplicate packet to the EoIB gateway
00828  *
00829  * @v eoib              EoIB device
00830  * @v original          Original I/O buffer
00831  */
00832 static void eoib_duplicate ( struct eoib_device *eoib,
00833                              struct io_buffer *original ) {
00834         struct net_device *netdev = eoib->netdev;
00835         struct ib_device *ibdev = eoib->ibdev;
00836         struct ib_address_vector *av = &eoib->gateway;
00837         size_t len = iob_len ( original );
00838         struct io_buffer *copy;
00839         int rc;
00840 
00841         /* Create copy of I/O buffer */
00842         copy = alloc_iob ( len );
00843         if ( ! copy ) {
00844                 rc = -ENOMEM;
00845                 goto err_alloc;
00846         }
00847         memcpy ( iob_put ( copy, len ), original->data, len );
00848 
00849         /* Append to network device's transmit queue */
00850         list_add_tail ( &copy->list, &original->list );
00851 
00852         /* Resolve path to gateway */
00853         if ( ( rc = ib_resolve_path ( ibdev, av ) ) != 0 ) {
00854                 DBGC ( eoib, "EoIB %s no path to gateway: %s\n",
00855                        eoib->name, strerror ( rc ) );
00856                 goto err_path;
00857         }
00858 
00859         /* Force use of GRH even for local destinations */
00860         av->gid_present = 1;
00861 
00862         /* Post send work queue entry */
00863         if ( ( rc = ib_post_send ( eoib->ibdev, eoib->qp, av, copy ) ) != 0 )
00864                 goto err_post_send;
00865 
00866         return;
00867 
00868  err_post_send:
00869  err_path:
00870         list_del ( &copy->list );
00871  err_alloc:
00872         netdev_tx_err ( netdev, copy, rc );
00873 }
00874 
00875 /**
00876  * Set EoIB gateway
00877  *
00878  * @v eoib              EoIB device
00879  * @v av                Address vector, or NULL to clear gateway
00880  */
00881 void eoib_set_gateway ( struct eoib_device *eoib,
00882                         struct ib_address_vector *av ) {
00883 
00884         if ( av ) {
00885                 DBGC ( eoib, "EoIB %s using gateway " IB_GID_FMT "\n",
00886                        eoib->name, IB_GID_ARGS ( &av->gid ) );
00887                 memcpy ( &eoib->gateway, av, sizeof ( eoib->gateway ) );
00888                 eoib->duplicate = eoib_duplicate;
00889         } else {
00890                 DBGC ( eoib, "EoIB %s not using gateway\n", eoib->name );
00891                 eoib->duplicate = NULL;
00892         }
00893 }