iPXE
eoib.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 #include <stdio.h>
27 #include <string.h>
28 #include <errno.h>
29 #include <ipxe/errortab.h>
30 #include <ipxe/malloc.h>
31 #include <ipxe/iobuf.h>
32 #include <ipxe/if_ether.h>
33 #include <ipxe/netdevice.h>
34 #include <ipxe/ethernet.h>
35 #include <ipxe/infiniband.h>
36 #include <ipxe/ib_mcast.h>
37 #include <ipxe/ib_pathrec.h>
38 #include <ipxe/eoib.h>
39 
40 /** @file
41  *
42  * Ethernet over Infiniband
43  *
44  */
45 
46 /** Number of EoIB send work queue entries */
47 #define EOIB_NUM_SEND_WQES 8
48 
49 /** Number of EoIB receive work queue entries */
50 #define EOIB_NUM_RECV_WQES 4
51 
52 /** Number of EoIB completion queue entries */
53 #define EOIB_NUM_CQES 16
54 
55 /** Link status for "broadcast join in progress" */
56 #define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
57 #define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
58  ( EINFO_EINPROGRESS, 0x01, "Joining" )
59 
60 /** Human-readable message for the link status */
61 struct errortab eoib_errors[] __errortab = {
63 };
64 
65 /** List of EoIB devices */
66 static LIST_HEAD ( eoib_devices );
67 
69 
70 /****************************************************************************
71  *
72  * EoIB peer cache
73  *
74  ****************************************************************************
75  */
76 
77 /** An EoIB peer cache entry */
78 struct eoib_peer {
79  /** List of EoIB peer cache entries */
80  struct list_head list;
81  /** Ethernet MAC */
83  /** Infiniband address vector */
85 };
86 
87 /**
88  * Find EoIB peer cache entry
89  *
90  * @v eoib EoIB device
91  * @v mac Ethernet MAC
92  * @ret peer EoIB peer, or NULL if not found
93  */
94 static struct eoib_peer * eoib_find_peer ( struct eoib_device *eoib,
95  const uint8_t *mac ) {
96  struct eoib_peer *peer;
97 
98  /* Find peer cache entry */
99  list_for_each_entry ( peer, &eoib->peers, list ) {
100  if ( memcmp ( mac, peer->mac, sizeof ( peer->mac ) ) == 0 ) {
101  /* Move peer to start of list */
102  list_del ( &peer->list );
103  list_add ( &peer->list, &eoib->peers );
104  return peer;
105  }
106  }
107 
108  return NULL;
109 }
110 
111 /**
112  * Create EoIB peer cache entry
113  *
114  * @v eoib EoIB device
115  * @v mac Ethernet MAC
116  * @ret peer EoIB peer, or NULL on error
117  */
118 static struct eoib_peer * eoib_create_peer ( struct eoib_device *eoib,
119  const uint8_t *mac ) {
120  struct eoib_peer *peer;
121 
122  /* Allocate and initialise peer cache entry */
123  peer = zalloc ( sizeof ( *peer ) );
124  if ( peer ) {
125  memcpy ( peer->mac, mac, sizeof ( peer->mac ) );
126  list_add ( &peer->list, &eoib->peers );
127  }
128  return peer;
129 }
130 
131 /**
132  * Flush EoIB peer cache
133  *
134  * @v eoib EoIB device
135  */
136 static void eoib_flush_peers ( struct eoib_device *eoib ) {
137  struct eoib_peer *peer;
138  struct eoib_peer *tmp;
139 
140  list_for_each_entry_safe ( peer, tmp, &eoib->peers, list ) {
141  list_del ( &peer->list );
142  free ( peer );
143  }
144 }
145 
146 /**
147  * Discard some entries from the peer cache
148  *
149  * @ret discarded Number of cached items discarded
150  */
151 static unsigned int eoib_discard ( void ) {
152  struct net_device *netdev;
153  struct eoib_device *eoib;
154  struct eoib_peer *peer;
155  unsigned int discarded = 0;
156 
157  /* Try to discard one cache entry for each EoIB device */
158  for_each_netdev ( netdev ) {
159 
160  /* Skip non-EoIB devices */
161  if ( netdev->op != &eoib_operations )
162  continue;
163  eoib = netdev->priv;
164 
165  /* Discard least recently used cache entry (if any) */
167  list_del ( &peer->list );
168  free ( peer );
169  discarded++;
170  break;
171  }
172  }
173 
174  return discarded;
175 }
176 
177 /** EoIB cache discarder */
178 struct cache_discarder eoib_discarder __cache_discarder ( CACHE_EXPENSIVE ) = {
180 };
181 
182 /**
183  * Find destination address vector
184  *
185  * @v eoib EoIB device
186  * @v mac Ethernet MAC
187  * @ret av Address vector, or NULL to send as broadcast
188  */
189 static struct ib_address_vector * eoib_tx_av ( struct eoib_device *eoib,
190  const uint8_t *mac ) {
191  struct ib_device *ibdev = eoib->ibdev;
192  struct eoib_peer *peer;
193  int rc;
194 
195  /* If this is a broadcast or multicast MAC address, then send
196  * this packet as a broadcast.
197  */
198  if ( is_multicast_ether_addr ( mac ) ) {
199  DBGCP ( eoib, "EoIB %s %s TX multicast\n",
200  eoib->name, eth_ntoa ( mac ) );
201  return NULL;
202  }
203 
204  /* If we have no peer cache entry, then create one and send
205  * this packet as a broadcast.
206  */
207  peer = eoib_find_peer ( eoib, mac );
208  if ( ! peer ) {
209  DBGC ( eoib, "EoIB %s %s TX unknown\n",
210  eoib->name, eth_ntoa ( mac ) );
211  eoib_create_peer ( eoib, mac );
212  return NULL;
213  }
214 
215  /* If we have not yet recorded a received GID and QPN for this
216  * peer cache entry, then send this packet as a broadcast.
217  */
218  if ( ! peer->av.gid_present ) {
219  DBGCP ( eoib, "EoIB %s %s TX not yet recorded\n",
220  eoib->name, eth_ntoa ( mac ) );
221  return NULL;
222  }
223 
224  /* If we have not yet resolved a path to this peer, then send
225  * this packet as a broadcast.
226  */
227  if ( ( rc = ib_resolve_path ( ibdev, &peer->av ) ) != 0 ) {
228  DBGCP ( eoib, "EoIB %s %s TX not yet resolved\n",
229  eoib->name, eth_ntoa ( mac ) );
230  return NULL;
231  }
232 
233  /* Force use of GRH even for local destinations */
234  peer->av.gid_present = 1;
235 
236  /* We have a fully resolved peer: send this packet as a
237  * unicast.
238  */
239  DBGCP ( eoib, "EoIB %s %s TX " IB_GID_FMT " QPN %#lx\n", eoib->name,
240  eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
241  return &peer->av;
242 }
243 
244 /**
245  * Record source address vector
246  *
247  * @v eoib EoIB device
248  * @v mac Ethernet MAC
249  * @v lid Infiniband LID
250  */
251 static void eoib_rx_av ( struct eoib_device *eoib, const uint8_t *mac,
252  const struct ib_address_vector *av ) {
253  const union ib_gid *gid = &av->gid;
254  unsigned long qpn = av->qpn;
255  struct eoib_peer *peer;
256 
257  /* Sanity checks */
258  if ( ! av->gid_present ) {
259  DBGC ( eoib, "EoIB %s %s RX with no GID\n",
260  eoib->name, eth_ntoa ( mac ) );
261  return;
262  }
263 
264  /* Find peer cache entry (if any) */
265  peer = eoib_find_peer ( eoib, mac );
266  if ( ! peer ) {
267  DBGCP ( eoib, "EoIB %s %s RX " IB_GID_FMT " (ignored)\n",
268  eoib->name, eth_ntoa ( mac ), IB_GID_ARGS ( gid ) );
269  return;
270  }
271 
272  /* Some dubious EoIB implementations utilise an Ethernet-to-
273  * EoIB gateway that will send packets from the wrong QPN.
274  */
275  if ( eoib_has_gateway ( eoib ) &&
276  ( memcmp ( gid, &eoib->gateway.gid, sizeof ( *gid ) ) == 0 ) ) {
277  qpn = eoib->gateway.qpn;
278  }
279 
280  /* Do nothing if peer cache entry is complete and correct */
281  if ( ( peer->av.lid == av->lid ) && ( peer->av.qpn == qpn ) ) {
282  DBGCP ( eoib, "EoIB %s %s RX unchanged\n",
283  eoib->name, eth_ntoa ( mac ) );
284  return;
285  }
286 
287  /* Update peer cache entry */
288  peer->av.qpn = qpn;
289  peer->av.qkey = eoib->broadcast.qkey;
290  peer->av.gid_present = 1;
291  memcpy ( &peer->av.gid, gid, sizeof ( peer->av.gid ) );
292  DBGC ( eoib, "EoIB %s %s RX " IB_GID_FMT " QPN %#lx\n", eoib->name,
293  eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
294 }
295 
296 /****************************************************************************
297  *
298  * EoIB network device
299  *
300  ****************************************************************************
301  */
302 
303 /**
304  * Transmit packet via EoIB network device
305  *
306  * @v netdev Network device
307  * @v iobuf I/O buffer
308  * @ret rc Return status code
309  */
310 static int eoib_transmit ( struct net_device *netdev,
311  struct io_buffer *iobuf ) {
312  struct eoib_device *eoib = netdev->priv;
313  struct eoib_header *eoib_hdr;
314  struct ethhdr *ethhdr;
315  struct ib_address_vector *av;
316  size_t zlen;
317 
318  /* Sanity checks */
319  assert ( iob_len ( iobuf ) >= sizeof ( *ethhdr ) );
320  assert ( iob_headroom ( iobuf ) >= sizeof ( *eoib_hdr ) );
321 
322  /* Look up destination address vector */
323  ethhdr = iobuf->data;
324  av = eoib_tx_av ( eoib, ethhdr->h_dest );
325 
326  /* Prepend EoIB header */
327  eoib_hdr = iob_push ( iobuf, sizeof ( *eoib_hdr ) );
328  eoib_hdr->magic = htons ( EOIB_MAGIC );
329  eoib_hdr->reserved = 0;
330 
331  /* Pad buffer to minimum Ethernet frame size */
332  zlen = ( sizeof ( *eoib_hdr ) + ETH_ZLEN );
333  assert ( zlen <= IOB_ZLEN );
334  if ( iob_len ( iobuf ) < zlen )
335  iob_pad ( iobuf, zlen );
336 
337  /* If we have no unicast address then send as a broadcast,
338  * with a duplicate sent to the gateway if applicable.
339  */
340  if ( ! av ) {
341  av = &eoib->broadcast;
342  if ( eoib_has_gateway ( eoib ) )
343  eoib->duplicate ( eoib, iobuf );
344  }
345 
346  /* Post send work queue entry */
347  return ib_post_send ( eoib->ibdev, eoib->qp, av, iobuf );
348 }
349 
350 /**
351  * Handle EoIB send completion
352  *
353  * @v ibdev Infiniband device
354  * @v qp Queue pair
355  * @v iobuf I/O buffer
356  * @v rc Completion status code
357  */
358 static void eoib_complete_send ( struct ib_device *ibdev __unused,
359  struct ib_queue_pair *qp,
360  struct io_buffer *iobuf, int rc ) {
361  struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
362 
363  netdev_tx_complete_err ( eoib->netdev, iobuf, rc );
364 }
365 
366 /**
367  * Handle EoIB receive completion
368  *
369  * @v ibdev Infiniband device
370  * @v qp Queue pair
371  * @v dest Destination address vector, or NULL
372  * @v source Source address vector, or NULL
373  * @v iobuf I/O buffer
374  * @v rc Completion status code
375  */
377  struct ib_queue_pair *qp,
379  struct ib_address_vector *source,
380  struct io_buffer *iobuf, int rc ) {
381  struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
382  struct net_device *netdev = eoib->netdev;
383  struct eoib_header *eoib_hdr;
384  struct ethhdr *ethhdr;
385 
386  /* Record errors */
387  if ( rc != 0 ) {
388  netdev_rx_err ( netdev, iobuf, rc );
389  return;
390  }
391 
392  /* Sanity check */
393  if ( iob_len ( iobuf ) < ( sizeof ( *eoib_hdr ) + sizeof ( *ethhdr ) )){
394  DBGC ( eoib, "EoIB %s received packet too short to "
395  "contain EoIB and Ethernet headers\n", eoib->name );
396  DBGC_HD ( eoib, iobuf->data, iob_len ( iobuf ) );
397  netdev_rx_err ( netdev, iobuf, -EIO );
398  return;
399  }
400  if ( ! source ) {
401  DBGC ( eoib, "EoIB %s received packet without address "
402  "vector\n", eoib->name );
403  netdev_rx_err ( netdev, iobuf, -ENOTTY );
404  return;
405  }
406 
407  /* Strip EoIB header */
408  iob_pull ( iobuf, sizeof ( *eoib_hdr ) );
409 
410  /* Update neighbour cache entry, if any */
411  ethhdr = iobuf->data;
412  eoib_rx_av ( eoib, ethhdr->h_source, source );
413 
414  /* Hand off to network layer */
415  netdev_rx ( netdev, iobuf );
416 }
417 
418 /** EoIB completion operations */
421  .complete_recv = eoib_complete_recv,
422 };
423 
424 /** EoIB queue pair operations */
426  .alloc_iob = alloc_iob,
427 };
428 
429 /**
430  * Poll EoIB network device
431  *
432  * @v netdev Network device
433  */
434 static void eoib_poll ( struct net_device *netdev ) {
435  struct eoib_device *eoib = netdev->priv;
436  struct ib_device *ibdev = eoib->ibdev;
437 
438  /* Poll Infiniband device */
439  ib_poll_eq ( ibdev );
440 
441  /* Poll the retry timers (required for EoIB multicast join) */
442  retry_poll();
443 }
444 
445 /**
446  * Handle EoIB broadcast multicast group join completion
447  *
448  * @v membership Multicast group membership
449  * @v rc Status code
450  */
451 static void eoib_join_complete ( struct ib_mc_membership *membership, int rc ) {
452  struct eoib_device *eoib =
454 
455  /* Record join status as link status */
456  netdev_link_err ( eoib->netdev, rc );
457 }
458 
459 /**
460  * Join EoIB broadcast multicast group
461  *
462  * @v eoib EoIB device
463  * @ret rc Return status code
464  */
465 static int eoib_join_broadcast_group ( struct eoib_device *eoib ) {
466  int rc;
467 
468  /* Join multicast group */
469  if ( ( rc = ib_mcast_join ( eoib->ibdev, eoib->qp,
470  &eoib->membership, &eoib->broadcast,
471  eoib->mask, eoib_join_complete ) ) != 0 ) {
472  DBGC ( eoib, "EoIB %s could not join broadcast group: %s\n",
473  eoib->name, strerror ( rc ) );
474  return rc;
475  }
476 
477  return 0;
478 }
479 
480 /**
481  * Leave EoIB broadcast multicast group
482  *
483  * @v eoib EoIB device
484  */
485 static void eoib_leave_broadcast_group ( struct eoib_device *eoib ) {
486 
487  /* Leave multicast group */
488  ib_mcast_leave ( eoib->ibdev, eoib->qp, &eoib->membership );
489 }
490 
491 /**
492  * Handle link status change
493  *
494  * @v eoib EoIB device
495  */
496 static void eoib_link_state_changed ( struct eoib_device *eoib ) {
497  struct net_device *netdev = eoib->netdev;
498  struct ib_device *ibdev = eoib->ibdev;
499  int rc;
500 
501  /* Leave existing broadcast group */
502  if ( eoib->qp )
504 
505  /* Update broadcast GID based on potentially-new partition key */
506  eoib->broadcast.gid.words[2] = htons ( ibdev->pkey | IB_PKEY_FULL );
507 
508  /* Set net device link state to reflect Infiniband link state */
509  rc = ib_link_rc ( ibdev );
511 
512  /* Join new broadcast group */
513  if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) && eoib->qp &&
514  ( ( rc = eoib_join_broadcast_group ( eoib ) ) != 0 ) ) {
515  DBGC ( eoib, "EoIB %s could not rejoin broadcast group: "
516  "%s\n", eoib->name, strerror ( rc ) );
518  return;
519  }
520 }
521 
522 /**
523  * Open EoIB network device
524  *
525  * @v netdev Network device
526  * @ret rc Return status code
527  */
528 static int eoib_open ( struct net_device *netdev ) {
529  struct eoib_device *eoib = netdev->priv;
530  struct ib_device *ibdev = eoib->ibdev;
531  int rc;
532 
533  /* Open IB device */
534  if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
535  DBGC ( eoib, "EoIB %s could not open %s: %s\n",
536  eoib->name, ibdev->name, strerror ( rc ) );
537  goto err_ib_open;
538  }
539 
540  /* Allocate completion queue */
541  if ( ( rc = ib_create_cq ( ibdev, EOIB_NUM_CQES, &eoib_cq_op,
542  &eoib->cq ) ) != 0 ) {
543  DBGC ( eoib, "EoIB %s could not create completion queue: %s\n",
544  eoib->name, strerror ( rc ) );
545  goto err_create_cq;
546  }
547 
548  /* Allocate queue pair */
549  if ( ( rc = ib_create_qp ( ibdev, IB_QPT_UD, EOIB_NUM_SEND_WQES,
550  eoib->cq, EOIB_NUM_RECV_WQES, eoib->cq,
551  &eoib_qp_op, netdev->name, &eoib->qp ) )!=0){
552  DBGC ( eoib, "EoIB %s could not create queue pair: %s\n",
553  eoib->name, strerror ( rc ) );
554  goto err_create_qp;
555  }
556  ib_qp_set_ownerdata ( eoib->qp, eoib );
557 
558  /* Fill receive rings */
559  ib_refill_recv ( ibdev, eoib->qp );
560 
561  /* Fake a link status change to join the broadcast group */
562  eoib_link_state_changed ( eoib );
563 
564  return 0;
565 
566  ib_destroy_qp ( ibdev, eoib->qp );
567  eoib->qp = NULL;
568  err_create_qp:
569  ib_destroy_cq ( ibdev, eoib->cq );
570  eoib->cq = NULL;
571  err_create_cq:
572  ib_close ( ibdev );
573  err_ib_open:
574  return rc;
575 }
576 
577 /**
578  * Close EoIB network device
579  *
580  * @v netdev Network device
581  */
582 static void eoib_close ( struct net_device *netdev ) {
583  struct eoib_device *eoib = netdev->priv;
584  struct ib_device *ibdev = eoib->ibdev;
585 
586  /* Flush peer cache */
587  eoib_flush_peers ( eoib );
588 
589  /* Leave broadcast group */
591 
592  /* Tear down the queues */
593  ib_destroy_qp ( ibdev, eoib->qp );
594  eoib->qp = NULL;
595  ib_destroy_cq ( ibdev, eoib->cq );
596  eoib->cq = NULL;
597 
598  /* Close IB device */
599  ib_close ( ibdev );
600 }
601 
602 /** EoIB network device operations */
603 static struct net_device_operations eoib_operations = {
604  .open = eoib_open,
605  .close = eoib_close,
606  .transmit = eoib_transmit,
607  .poll = eoib_poll,
608 };
609 
610 /**
611  * Create EoIB device
612  *
613  * @v ibdev Infiniband device
614  * @v hw_addr Ethernet MAC
615  * @v broadcast Broadcast address vector
616  * @v name Interface name (or NULL to use default)
617  * @ret rc Return status code
618  */
619 int eoib_create ( struct ib_device *ibdev, const uint8_t *hw_addr,
620  struct ib_address_vector *broadcast, const char *name ) {
621  struct net_device *netdev;
622  struct eoib_device *eoib;
623  int rc;
624 
625  /* Allocate network device */
626  netdev = alloc_etherdev ( sizeof ( *eoib ) );
627  if ( ! netdev ) {
628  rc = -ENOMEM;
629  goto err_alloc;
630  }
632  eoib = netdev->priv;
633  netdev->dev = ibdev->dev;
634  eoib->netdev = netdev;
635  eoib->ibdev = ibdev_get ( ibdev );
636  memcpy ( &eoib->broadcast, broadcast, sizeof ( eoib->broadcast ) );
637  INIT_LIST_HEAD ( &eoib->peers );
638 
639  /* Set MAC address */
640  memcpy ( netdev->hw_addr, hw_addr, ETH_ALEN );
641 
642  /* Set interface name, if applicable */
643  if ( name )
644  snprintf ( netdev->name, sizeof ( netdev->name ), "%s", name );
645  eoib->name = netdev->name;
646 
647  /* Add to list of EoIB devices */
648  list_add_tail ( &eoib->list, &eoib_devices );
649 
650  /* Register network device */
651  if ( ( rc = register_netdev ( netdev ) ) != 0 )
652  goto err_register;
653 
654  DBGC ( eoib, "EoIB %s created for %s MAC %s\n",
655  eoib->name, ibdev->name, eth_ntoa ( hw_addr ) );
656  DBGC ( eoib, "EoIB %s broadcast GID " IB_GID_FMT "\n",
657  eoib->name, IB_GID_ARGS ( &broadcast->gid ) );
658  return 0;
659 
661  err_register:
662  list_del ( &eoib->list );
663  ibdev_put ( ibdev );
665  netdev_put ( netdev );
666  err_alloc:
667  return rc;
668 }
669 
670 /**
671  * Find EoIB device
672  *
673  * @v ibdev Infiniband device
674  * @v hw_addr Original Ethernet MAC
675  * @ret eoib EoIB device
676  */
678  const uint8_t *hw_addr ) {
679  struct eoib_device *eoib;
680 
681  list_for_each_entry ( eoib, &eoib_devices, list ) {
682  if ( ( eoib->ibdev == ibdev ) &&
683  ( memcmp ( eoib->netdev->hw_addr, hw_addr,
684  ETH_ALEN ) == 0 ) )
685  return eoib;
686  }
687  return NULL;
688 }
689 
690 /**
691  * Remove EoIB device
692  *
693  * @v eoib EoIB device
694  */
695 void eoib_destroy ( struct eoib_device *eoib ) {
696  struct net_device *netdev = eoib->netdev;
697 
698  /* Unregister network device */
700 
701  /* Remove from list of network devices */
702  list_del ( &eoib->list );
703 
704  /* Drop reference to Infiniband device */
705  ibdev_put ( eoib->ibdev );
706 
707  /* Free network device */
708  DBGC ( eoib, "EoIB %s destroyed\n", eoib->name );
710  netdev_put ( netdev );
711 }
712 
713 /**
714  * Probe EoIB device
715  *
716  * @v ibdev Infiniband device
717  * @ret rc Return status code
718  */
719 static int eoib_probe ( struct ib_device *ibdev __unused ) {
720 
721  /* EoIB devices are not created automatically */
722  return 0;
723 }
724 
725 /**
726  * Handle device or link status change
727  *
728  * @v ibdev Infiniband device
729  */
730 static void eoib_notify ( struct ib_device *ibdev ) {
731  struct eoib_device *eoib;
732 
733  /* Handle link status change for any attached EoIB devices */
734  list_for_each_entry ( eoib, &eoib_devices, list ) {
735  if ( eoib->ibdev != ibdev )
736  continue;
737  eoib_link_state_changed ( eoib );
738  }
739 }
740 
741 /**
742  * Remove EoIB device
743  *
744  * @v ibdev Infiniband device
745  */
746 static void eoib_remove ( struct ib_device *ibdev ) {
747  struct eoib_device *eoib;
748  struct eoib_device *tmp;
749 
750  /* Remove any attached EoIB devices */
751  list_for_each_entry_safe ( eoib, tmp, &eoib_devices, list ) {
752  if ( eoib->ibdev != ibdev )
753  continue;
754  eoib_destroy ( eoib );
755  }
756 }
757 
758 /** EoIB driver */
759 struct ib_driver eoib_driver __ib_driver = {
760  .name = "EoIB",
761  .probe = eoib_probe,
762  .notify = eoib_notify,
763  .remove = eoib_remove,
764 };
765 
766 /****************************************************************************
767  *
768  * EoIB heartbeat packets
769  *
770  ****************************************************************************
771  */
772 
773 /**
774  * Silently ignore incoming EoIB heartbeat packets
775  *
776  * @v iobuf I/O buffer
777  * @v netdev Network device
778  * @v ll_source Link-layer source address
779  * @v flags Packet flags
780  * @ret rc Return status code
781  */
782 static int eoib_heartbeat_rx ( struct io_buffer *iobuf,
783  struct net_device *netdev __unused,
784  const void *ll_dest __unused,
785  const void *ll_source __unused,
786  unsigned int flags __unused ) {
787  free_iob ( iobuf );
788  return 0;
789 }
790 
791 /**
792  * Transcribe EoIB heartbeat address
793  *
794  * @v net_addr EoIB heartbeat address
795  * @ret string "<EoIB>"
796  *
797  * This operation is meaningless for the EoIB heartbeat protocol.
798  */
799 static const char * eoib_heartbeat_ntoa ( const void *net_addr __unused ) {
800  return "<EoIB>";
801 }
802 
803 /** EoIB heartbeat network protocol */
804 struct net_protocol eoib_heartbeat_protocol __net_protocol = {
805  .name = "EoIB",
806  .net_proto = htons ( EOIB_MAGIC ),
807  .rx = eoib_heartbeat_rx,
808  .ntoa = eoib_heartbeat_ntoa,
809 };
810 
811 /****************************************************************************
812  *
813  * EoIB gateway
814  *
815  ****************************************************************************
816  *
817  * Some dubious EoIB implementations require all broadcast traffic to
818  * be sent twice: once to the actual broadcast group, and once as a
819  * unicast to the EoIB-to-Ethernet gateway. This somewhat curious
820  * design arises since the EoIB-to-Ethernet gateway hardware lacks the
821  * ability to attach a queue pair to a multicast GID (or LID), and so
822  * cannot receive traffic sent to the broadcast group.
823  *
824  */
825 
826 /**
827  * Transmit duplicate packet to the EoIB gateway
828  *
829  * @v eoib EoIB device
830  * @v original Original I/O buffer
831  */
832 static void eoib_duplicate ( struct eoib_device *eoib,
833  struct io_buffer *original ) {
834  struct net_device *netdev = eoib->netdev;
835  struct ib_device *ibdev = eoib->ibdev;
836  struct ib_address_vector *av = &eoib->gateway;
837  size_t len = iob_len ( original );
838  struct io_buffer *copy;
839  int rc;
840 
841  /* Create copy of I/O buffer */
842  copy = alloc_iob ( len );
843  if ( ! copy ) {
844  rc = -ENOMEM;
845  goto err_alloc;
846  }
847  memcpy ( iob_put ( copy, len ), original->data, len );
848 
849  /* Append to network device's transmit queue */
850  list_add_tail ( &copy->list, &original->list );
851 
852  /* Resolve path to gateway */
853  if ( ( rc = ib_resolve_path ( ibdev, av ) ) != 0 ) {
854  DBGC ( eoib, "EoIB %s no path to gateway: %s\n",
855  eoib->name, strerror ( rc ) );
856  goto err_path;
857  }
858 
859  /* Force use of GRH even for local destinations */
860  av->gid_present = 1;
861 
862  /* Post send work queue entry */
863  if ( ( rc = ib_post_send ( eoib->ibdev, eoib->qp, av, copy ) ) != 0 )
864  goto err_post_send;
865 
866  return;
867 
868  err_post_send:
869  err_path:
870  list_del ( &copy->list );
871  err_alloc:
872  netdev_tx_err ( netdev, copy, rc );
873 }
874 
875 /**
876  * Set EoIB gateway
877  *
878  * @v eoib EoIB device
879  * @v av Address vector, or NULL to clear gateway
880  */
881 void eoib_set_gateway ( struct eoib_device *eoib,
882  struct ib_address_vector *av ) {
883 
884  if ( av ) {
885  DBGC ( eoib, "EoIB %s using gateway " IB_GID_FMT "\n",
886  eoib->name, IB_GID_ARGS ( &av->gid ) );
887  memcpy ( &eoib->gateway, av, sizeof ( eoib->gateway ) );
888  eoib->duplicate = eoib_duplicate;
889  } else {
890  DBGC ( eoib, "EoIB %s not using gateway\n", eoib->name );
891  eoib->duplicate = NULL;
892  }
893 }
int eoib_create(struct ib_device *ibdev, const uint8_t *hw_addr, struct ib_address_vector *broadcast, const char *name)
Create EoIB device.
Definition: eoib.c:619
#define iob_pull(iobuf, len)
Definition: iobuf.h:102
int ib_link_rc(struct ib_device *ibdev)
Get link state.
Definition: infiniband.c:594
void ib_poll_eq(struct ib_device *ibdev)
Poll event queue.
Definition: infiniband.c:878
struct arbelprm_rc_send_wqe rc
Definition: arbel.h:14
const char * name
Definition: ath9k_hw.c:1984
Ethernet over Infiniband.
Infiniband protocol.
const char * name
Protocol name.
Definition: netdevice.h:66
Infiniband path records.
struct list_head peers
Peer cache.
Definition: eoib.h:50
#define iob_put(iobuf, len)
Definition: iobuf.h:120
static __always_inline int ib_link_ok(struct ib_device *ibdev)
Check link state of Infiniband device.
Definition: infiniband.h:565
Error message tables.
void netdev_rx_err(struct net_device *netdev, struct io_buffer *iobuf, int rc)
Discard received packet.
Definition: netdevice.c:586
static void eoib_complete_recv(struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct ib_address_vector *dest __unused, struct ib_address_vector *source, struct io_buffer *iobuf, int rc)
Handle EoIB receive completion.
Definition: eoib.c:376
#define EOIB_NUM_SEND_WQES
Number of EoIB send work queue entries.
Definition: eoib.c:47
char name[IBDEV_NAME_LEN]
Name of this Infiniband device.
Definition: infiniband.h:408
static int ib_is_open(struct ib_device *ibdev)
Check whether or not Infiniband device is open.
Definition: infiniband.h:576
struct ib_address_vector broadcast
Broadcast address.
Definition: eoib.h:40
#define EINFO_EINPROGRESS_JOINING
Definition: eoib.c:57
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
int(* open)(struct net_device *netdev)
Open network device.
Definition: netdevice.h:222
void eoib_destroy(struct eoib_device *eoib)
Remove EoIB device.
Definition: eoib.c:695
#define list_add(new, head)
Add a new entry to the head of a list.
Definition: list.h:69
void netdev_tx_err(struct net_device *netdev, struct io_buffer *iobuf, int rc)
Discard transmitted packet.
Definition: netdevice.c:440
Infiniband multicast groups.
struct ib_driver eoib_driver __ib_driver
EoIB driver.
Definition: eoib.c:759
Error codes.
An EoIB device.
Definition: eoib.h:30
int ib_mcast_join(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_mc_membership *membership, struct ib_address_vector *av, unsigned int mask, void(*complete)(struct ib_mc_membership *membership, int rc))
Join multicast group.
Definition: ib_mcast.c:152
#define iob_push(iobuf, len)
Definition: iobuf.h:84
I/O buffers.
void free_iob(struct io_buffer *iobuf)
Free I/O buffer.
Definition: iobuf.c:146
void eoib_set_gateway(struct eoib_device *eoib, struct ib_address_vector *av)
Set EoIB gateway.
Definition: eoib.c:881
#define __einfo_errortab(einfo)
Definition: errortab.h:23
#define IB_PKEY_FULL
Infiniband partition key full membership flag.
Definition: infiniband.h:42
int ib_create_cq(struct ib_device *ibdev, unsigned int num_cqes, struct ib_completion_queue_operations *op, struct ib_completion_queue **new_cq)
Create completion queue.
Definition: infiniband.c:98
void ib_refill_recv(struct ib_device *ibdev, struct ib_queue_pair *qp)
Refill receive work queue.
Definition: infiniband.c:556
#define DBGC(...)
Definition: compiler.h:505
#define EOIB_MAGIC
EoIB magic signature.
Definition: eoib.h:27
struct io_buffer *(* alloc_iob)(size_t len)
Allocate receive I/O buffer.
Definition: infiniband.h:153
int ib_open(struct ib_device *ibdev)
Open port.
Definition: infiniband.c:652
struct device * dev
Underlying device.
Definition: infiniband.h:410
static void eoib_notify(struct ib_device *ibdev)
Handle device or link status change.
Definition: eoib.c:730
An Infiniband upper-layer driver.
Definition: infiniband.h:471
static LIST_HEAD(eoib_devices)
List of EoIB devices.
static int eoib_join_broadcast_group(struct eoib_device *eoib)
Join EoIB broadcast multicast group.
Definition: eoib.c:465
void ib_close(struct ib_device *ibdev)
Close port.
Definition: infiniband.c:716
uint8_t mac[ETH_ALEN]
MAC address.
Definition: ena.h:24
struct net_protocol eoib_heartbeat_protocol __net_protocol
EoIB heartbeat network protocol.
Definition: eoib.c:804
unsigned int gid_present
GID is present.
Definition: infiniband.h:90
static void eoib_remove(struct ib_device *ibdev)
Remove EoIB device.
Definition: eoib.c:746
#define EOIB_NUM_RECV_WQES
Number of EoIB receive work queue entries.
Definition: eoib.c:50
struct net_device_operations * op
Network device operations.
Definition: netdevice.h:369
struct io_buffer * alloc_iob(size_t len)
Allocate I/O buffer.
Definition: iobuf.c:129
static __always_inline struct ib_device * ibdev_get(struct ib_device *ibdev)
Get reference to Infiniband device.
Definition: infiniband.h:587
An Infiniband Global Identifier.
Definition: ib_packet.h:33
__be32 qpn
Definition: CIB_PRM.h:29
const char * name
Name.
Definition: eoib.h:32
struct ib_completion_queue * cq
Completion queue.
Definition: eoib.h:43
#define EOIB_NUM_CQES
Number of EoIB completion queue entries.
Definition: eoib.c:53
void ib_destroy_cq(struct ib_device *ibdev, struct ib_completion_queue *cq)
Destroy completion queue.
Definition: infiniband.c:145
A doubly-linked list entry (or list head)
Definition: list.h:18
Dynamic memory allocation.
static void eoib_rx_av(struct eoib_device *eoib, const uint8_t *mac, const struct ib_address_vector *av)
Record source address vector.
Definition: eoib.c:251
unsigned long tmp
Definition: linux_pci.h:53
An Infiniband device.
Definition: infiniband.h:398
static void netdev_init(struct net_device *netdev, struct net_device_operations *op)
Initialise a network device.
Definition: netdevice.h:515
static unsigned int eoib_discard(void)
Discard some entries from the peer cache.
Definition: eoib.c:151
#define list_del(list)
Delete an entry from a list.
Definition: list.h:119
#define EINPROGRESS_JOINING
Link status for "broadcast join in progress".
Definition: eoib.c:56
uint8_t h_dest[ETH_ALEN]
Destination MAC address.
Definition: if_ether.h:33
#define ENOMEM
Not enough space.
Definition: errno.h:534
Infiniband completion queue operations.
Definition: infiniband.h:194
void * memcpy(void *dest, const void *src, size_t len) __nonnull
Infiniband queue pair operations.
Definition: infiniband.h:147
static struct ib_queue_pair_operations eoib_qp_op
EoIB queue pair operations.
Definition: eoib.c:425
unsigned long qkey
Queue key.
Definition: infiniband.h:79
static void eoib_poll(struct net_device *netdev)
Poll EoIB network device.
Definition: eoib.c:434
static int eoib_open(struct net_device *netdev)
Open EoIB network device.
Definition: eoib.c:528
assert((readw(&hdr->flags) &(GTF_reading|GTF_writing))==0)
static void netdev_put(struct net_device *netdev)
Drop reference to network device.
Definition: netdevice.h:572
#define container_of(ptr, type, field)
Get containing structure.
Definition: stddef.h:35
static int eoib_probe(struct ib_device *ibdev __unused)
Probe EoIB device.
Definition: eoib.c:719
Ethernet protocol.
#define CACHE_EXPENSIVE
Items with a high replacement cost.
Definition: malloc.h:105
unsigned int(* discard)(void)
Discard some cached data.
Definition: malloc.h:89
uint8_t mac[ETH_ALEN]
Ethernet MAC.
Definition: eoib.c:82
void * priv
Driver private data.
Definition: netdevice.h:431
#define list_for_each_entry(pos, head, member)
Iterate over entries in a list.
Definition: list.h:431
static int eoib_transmit(struct net_device *netdev, struct io_buffer *iobuf)
Transmit packet via EoIB network device.
Definition: eoib.c:310
#define list_add_tail(new, head)
Add a new entry to the tail of a list.
Definition: list.h:93
struct ib_mc_membership membership
Broadcast group membership.
Definition: eoib.h:47
struct list_head list
List of EoIB devices.
Definition: eoib.h:38
#define list_for_each_entry_reverse(pos, head, member)
Iterate over entries in a list in reverse order.
Definition: list.h:444
unsigned int mask
Multicast group additional component mask.
Definition: eoib.h:62
static struct net_device * netdev
Definition: gdbudp.c:52
static int is_multicast_ether_addr(const void *addr)
Check if Ethernet address is a multicast address.
Definition: ethernet.h:37
static void eoib_link_state_changed(struct eoib_device *eoib)
Handle link status change.
Definition: eoib.c:496
struct net_device * netdev
Network device.
Definition: eoib.h:34
An Infiniband multicast group membership.
Definition: ib_mcast.h:17
void unregister_netdev(struct net_device *netdev)
Unregister network device.
Definition: netdevice.c:941
static int eoib_has_gateway(struct eoib_device *eoib)
Check if EoIB device uses a gateway.
Definition: eoib.h:71
struct ib_device * ibdev
Underlying Infiniband device.
Definition: eoib.h:36
static void * dest
Definition: strings.h:176
#define list_for_each_entry_safe(pos, tmp, head, member)
Iterate over entries in a list, safe against deletion of the current entry.
Definition: list.h:458
#define IB_GID_ARGS(gid)
Infiniband Global Identifier debug message arguments.
Definition: ib_packet.h:48
static __always_inline void ibdev_put(struct ib_device *ibdev)
Drop reference to Infiniband device.
Definition: infiniband.h:598
char * strerror(int errno)
Retrieve string representation of error number.
Definition: strerror.c:78
static void(* free)(struct refcnt *refcnt))
Definition: refcnt.h:54
struct cache_discarder eoib_discarder __cache_discarder(CACHE_EXPENSIVE)
EoIB cache discarder.
static void eoib_flush_peers(struct eoib_device *eoib)
Flush EoIB peer cache.
Definition: eoib.c:136
An EoIB header.
Definition: eoib.h:19
uint16_t words[8]
Definition: ib_packet.h:35
void(* duplicate)(struct eoib_device *eoib, struct io_buffer *original)
Send duplicate packet to gateway (or NULL)
Definition: eoib.h:57
static __always_inline void * ib_qp_get_ownerdata(struct ib_queue_pair *qp)
Get Infiniband queue pair owner-private data.
Definition: infiniband.h:664
void * zalloc(size_t size)
Allocate cleared memory.
Definition: malloc.c:624
#define for_each_netdev(netdev)
Iterate over all network devices.
Definition: netdevice.h:543
static void eoib_complete_send(struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct io_buffer *iobuf, int rc)
Handle EoIB send completion.
Definition: eoib.c:358
int register_netdev(struct net_device *netdev)
Register network device.
Definition: netdevice.c:759
static size_t iob_len(struct io_buffer *iobuf)
Calculate length of data in an I/O buffer.
Definition: iobuf.h:155
const char * eth_ntoa(const void *ll_addr)
Transcribe Ethernet address.
Definition: ethernet.c:175
static int eoib_heartbeat_rx(struct io_buffer *iobuf, struct net_device *netdev __unused, const void *ll_dest __unused, const void *ll_source __unused, unsigned int flags __unused)
Silently ignore incoming EoIB heartbeat packets.
Definition: eoib.c:782
#define DBGC_HD(...)
Definition: compiler.h:507
static void eoib_duplicate(struct eoib_device *eoib, struct io_buffer *original)
Transmit duplicate packet to the EoIB gateway.
Definition: eoib.c:832
A network device.
Definition: netdevice.h:352
void netdev_link_err(struct net_device *netdev, int rc)
Mark network device as having a specific link state.
Definition: netdevice.c:207
static void netdev_nullify(struct net_device *netdev)
Stop using a network device.
Definition: netdevice.h:528
A cache discarder.
Definition: malloc.h:83
static void eoib_leave_broadcast_group(struct eoib_device *eoib)
Leave EoIB broadcast multicast group.
Definition: eoib.c:485
unsigned char uint8_t
Definition: stdint.h:10
void ib_destroy_qp(struct ib_device *ibdev, struct ib_queue_pair *qp)
Destroy queue pair.
Definition: infiniband.c:314
uint8_t h_source[ETH_ALEN]
Source MAC address.
Definition: if_ether.h:35
struct list_head list
List of EoIB peer cache entries.
Definition: eoib.c:80
unsigned long qpn
Queue Pair Number.
Definition: infiniband.h:74
An EoIB peer cache entry.
Definition: eoib.c:78
#define ETH_ALEN
Definition: if_ether.h:8
#define ETH_ZLEN
Definition: if_ether.h:10
int ib_resolve_path(struct ib_device *ibdev, struct ib_address_vector *av)
Resolve path.
Definition: ib_pathrec.c:249
uint16_t magic
Signature.
Definition: eoib.h:21
Network device operations.
Definition: netdevice.h:213
void netdev_rx(struct net_device *netdev, struct io_buffer *iobuf)
Add packet to receive queue.
Definition: netdevice.c:548
struct device * dev
Underlying hardware device.
Definition: netdevice.h:364
void retry_poll(void)
Poll the retry timer list.
Definition: retry.c:197
An Infiniband Queue Pair.
Definition: infiniband.h:157
A network-layer protocol.
Definition: netdevice.h:64
Network device management.
static struct net_device_operations eoib_operations
EoIB network device operations.
Definition: eoib.c:68
struct eoib_device * eoib_find(struct ib_device *ibdev, const uint8_t *hw_addr)
Find EoIB device.
Definition: eoib.c:677
#define __unused
Declare a variable or data structure as unused.
Definition: compiler.h:573
struct arbelprm_qp_db_record qp
Definition: arbel.h:13
int ib_post_send(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *dest, struct io_buffer *iobuf)
Post send work queue entry.
Definition: infiniband.c:416
#define INIT_LIST_HEAD(list)
Initialise a list head.
Definition: list.h:45
static struct eoib_peer * eoib_find_peer(struct eoib_device *eoib, const uint8_t *mac)
Find EoIB peer cache entry.
Definition: eoib.c:94
static size_t iob_headroom(struct io_buffer *iobuf)
Calculate available space at start of an I/O buffer.
Definition: iobuf.h:165
struct ib_address_vector gateway
Gateway (if any)
Definition: eoib.h:60
void netdev_tx_complete_err(struct net_device *netdev, struct io_buffer *iobuf, int rc)
Complete network transmission.
Definition: netdevice.c:470
char name[NETDEV_NAME_LEN]
Name of this network device.
Definition: netdevice.h:362
struct list_head list
List of which this buffer is a member.
Definition: iobuf.h:40
struct ib_address_vector av
Infiniband address vector.
Definition: eoib.c:84
uint32_t len
Length.
Definition: ena.h:14
#define ENOTTY
Inappropriate I/O control operation.
Definition: errno.h:594
int ib_create_qp(struct ib_device *ibdev, enum ib_queue_pair_type type, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, struct ib_completion_queue *recv_cq, struct ib_queue_pair_operations *op, const char *name, struct ib_queue_pair **new_qp)
Create queue pair.
Definition: infiniband.c:199
#define IOB_ZLEN
Minimum I/O buffer length.
Definition: iobuf.h:24
void * data
Start of data.
Definition: iobuf.h:48
#define EIO
Input/output error.
Definition: errno.h:433
static struct ib_completion_queue_operations eoib_cq_op
EoIB completion operations.
Definition: eoib.c:419
union ib_gid gid
GID, if present.
Definition: infiniband.h:92
static struct eoib_peer * eoib_create_peer(struct eoib_device *eoib, const uint8_t *mac)
Create EoIB peer cache entry.
Definition: eoib.c:118
struct net_device * alloc_etherdev(size_t priv_size)
Allocate Ethernet device.
Definition: ethernet.c:264
#define IB_GID_FMT
Infiniband Global Identifier debug message format.
Definition: ib_packet.h:45
void(* complete_send)(struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf, int rc)
Complete Send WQE.
Definition: infiniband.h:203
#define DBGCP(...)
Definition: compiler.h:539
uint16_t reserved
Reserved.
Definition: eoib.h:23
int snprintf(char *buf, size_t size, const char *fmt,...)
Write a formatted string to a buffer.
Definition: vsprintf.c:382
uint16_t pkey
Partition key.
Definition: infiniband.h:449
An Infiniband Address Vector.
Definition: infiniband.h:72
const char * name
Name.
Definition: infiniband.h:473
static const char * eoib_heartbeat_ntoa(const void *net_addr __unused)
Transcribe EoIB heartbeat address.
Definition: eoib.c:799
struct mschapv2_challenge peer
Peer challenge.
Definition: mschapv2.h:12
struct ib_queue_pair * qp
Queue pair.
Definition: eoib.h:45
unsigned int lid
Local ID.
Definition: infiniband.h:81
void ib_mcast_leave(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_mc_membership *membership)
Leave multicast group.
Definition: ib_mcast.c:209
struct errortab eoib_errors [] __errortab
Human-readable message for the link status.
Definition: eoib.c:61
static struct ib_address_vector * eoib_tx_av(struct eoib_device *eoib, const uint8_t *mac)
Find destination address vector.
Definition: eoib.c:189
u8 gid[16]
Definition: CIB_PRM.h:31
An Ethernet link-layer header.
Definition: if_ether.h:31
int memcmp(const void *first, const void *second, size_t len)
Compare memory regions.
Definition: string.c:114
uint8_t hw_addr[MAX_HW_ADDR_LEN]
Hardware address.
Definition: netdevice.h:381
#define NULL
NULL pointer (VOID *)
Definition: Base.h:321
static void eoib_join_complete(struct ib_mc_membership *membership, int rc)
Handle EoIB broadcast multicast group join completion.
Definition: eoib.c:451
static __always_inline void ib_qp_set_ownerdata(struct ib_queue_pair *qp, void *priv)
Set Infiniband queue pair owner-private data.
Definition: infiniband.h:653
String functions.
void iob_pad(struct io_buffer *iobuf, size_t min_len)
Pad I/O buffer.
Definition: iobpad.c:49
#define htons(value)
Definition: byteswap.h:135
A persistent I/O buffer.
Definition: iobuf.h:33
uint8_t flags
Flags.
Definition: ena.h:18
static void eoib_close(struct net_device *netdev)
Close EoIB network device.
Definition: eoib.c:582