iPXE
ipoib.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <string.h>
31 #include <byteswap.h>
32 #include <errno.h>
33 #include <ipxe/errortab.h>
34 #include <ipxe/malloc.h>
35 #include <ipxe/if_arp.h>
36 #include <ipxe/arp.h>
37 #include <ipxe/if_ether.h>
38 #include <ipxe/ethernet.h>
39 #include <ipxe/ip.h>
40 #include <ipxe/iobuf.h>
41 #include <ipxe/netdevice.h>
42 #include <ipxe/infiniband.h>
43 #include <ipxe/ib_pathrec.h>
44 #include <ipxe/ib_mcast.h>
45 #include <ipxe/retry.h>
46 #include <ipxe/ipoib.h>
47 
48 /** @file
49  *
50  * IP over Infiniband
51  */
52 
53 /* Disambiguate the various error causes */
54 #define ENXIO_ARP_REPLY __einfo_error ( EINFO_ENXIO_ARP_REPLY )
55 #define EINFO_ENXIO_ARP_REPLY \
56  __einfo_uniqify ( EINFO_ENXIO, 0x01, \
57  "Missing REMAC for ARP reply target address" )
58 #define ENXIO_NON_IPV4 __einfo_error ( EINFO_ENXIO_NON_IPV4 )
59 #define EINFO_ENXIO_NON_IPV4 \
60  __einfo_uniqify ( EINFO_ENXIO, 0x02, \
61  "Missing REMAC for non-IPv4 packet" )
62 #define ENXIO_ARP_SENT __einfo_error ( EINFO_ENXIO_ARP_SENT )
63 #define EINFO_ENXIO_ARP_SENT \
64  __einfo_uniqify ( EINFO_ENXIO, 0x03, \
65  "Missing REMAC for IPv4 packet (ARP sent)" )
66 
67 /** Number of IPoIB send work queue entries */
68 #define IPOIB_NUM_SEND_WQES 8
69 
70 /** Number of IPoIB receive work queue entries */
71 #define IPOIB_NUM_RECV_WQES 4
72 
73 /** Number of IPoIB completion entries */
74 #define IPOIB_NUM_CQES 16
75 
76 /** An IPoIB broadcast address */
78  /** MAC address */
79  struct ipoib_mac mac;
80  /** Address vector */
82  /** Multicast group membership */
84 };
85 
86 /** An IPoIB device */
87 struct ipoib_device {
88  /** Network device */
89  struct net_device *netdev;
90  /** Underlying Infiniband device */
91  struct ib_device *ibdev;
92  /** List of IPoIB devices */
93  struct list_head list;
94  /** Completion queue */
96  /** Queue pair */
97  struct ib_queue_pair *qp;
98  /** Local MAC */
99  struct ipoib_mac mac;
100  /** Broadcast address */
102  /** REMAC cache */
103  struct list_head peers;
104 };
105 
106 /** Broadcast IPoIB address */
107 static struct ipoib_mac ipoib_broadcast = {
108  .flags__qpn = htonl ( IB_QPN_BROADCAST ),
109  .gid.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
110  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
111 };
112 
113 /** Link status for "broadcast join in progress" */
114 #define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
115 #define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
116  ( EINFO_EINPROGRESS, 0x01, "Joining" )
117 
118 /** Human-readable message for the link status */
119 struct errortab ipoib_errors[] __errortab = {
121 };
122 
123 /** List of all IPoIB devices */
124 static LIST_HEAD ( ipoib_devices );
125 
127 
128 /****************************************************************************
129  *
130  * IPoIB REMAC cache
131  *
132  ****************************************************************************
133  */
134 
135 /** An IPoIB REMAC cache entry */
136 struct ipoib_peer {
137  /** List of REMAC cache entries */
138  struct list_head list;
139  /** Remote Ethermet MAC */
141  /** MAC address */
142  struct ipoib_mac mac;
143 };
144 
145 /**
146  * Find IPoIB MAC from REMAC
147  *
148  * @v ipoib IPoIB device
149  * @v remac Remote Ethernet MAC
150  * @ret mac IPoIB MAC (or NULL if not found)
151  */
152 static struct ipoib_mac * ipoib_find_remac ( struct ipoib_device *ipoib,
153  const struct ipoib_remac *remac ) {
154  struct ipoib_peer *peer;
155 
156  /* Check for broadcast or multicast REMAC. We transmit
157  * multicasts as broadcasts for simplicity.
158  */
159  if ( is_multicast_ether_addr ( remac ) )
160  return &ipoib->broadcast.mac;
161 
162  /* Try to find via REMAC cache */
163  list_for_each_entry ( peer, &ipoib->peers, list ) {
164  if ( memcmp ( remac, &peer->remac,
165  sizeof ( peer->remac ) ) == 0 ) {
166  /* Move peer to start of list */
167  list_del ( &peer->list );
168  list_add ( &peer->list, &ipoib->peers );
169  return &peer->mac;
170  }
171  }
172 
173  DBGC ( ipoib, "IPoIB %p unknown REMAC %s\n",
174  ipoib, eth_ntoa ( remac ) );
175  return NULL;
176 }
177 
178 /**
179  * Add IPoIB MAC to REMAC cache
180  *
181  * @v ipoib IPoIB device
182  * @v remac Remote Ethernet MAC
183  * @v mac IPoIB MAC
184  * @ret rc Return status code
185  */
186 static int ipoib_map_remac ( struct ipoib_device *ipoib,
187  const struct ipoib_remac *remac,
188  const struct ipoib_mac *mac ) {
189  struct ipoib_peer *peer;
190 
191  /* Check for existing entry in REMAC cache */
192  list_for_each_entry ( peer, &ipoib->peers, list ) {
193  if ( memcmp ( remac, &peer->remac,
194  sizeof ( peer->remac ) ) == 0 ) {
195  /* Move peer to start of list */
196  list_del ( &peer->list );
197  list_add ( &peer->list, &ipoib->peers );
198  /* Update MAC */
199  memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
200  return 0;
201  }
202  }
203 
204  /* Create new entry */
205  peer = malloc ( sizeof ( *peer ) );
206  if ( ! peer )
207  return -ENOMEM;
208  memcpy ( &peer->remac, remac, sizeof ( peer->remac ) );
209  memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
210  list_add ( &peer->list, &ipoib->peers );
211 
212  return 0;
213 }
214 
215 /**
216  * Flush REMAC cache
217  *
218  * @v ipoib IPoIB device
219  */
220 static void ipoib_flush_remac ( struct ipoib_device *ipoib ) {
221  struct ipoib_peer *peer;
222  struct ipoib_peer *tmp;
223 
224  list_for_each_entry_safe ( peer, tmp, &ipoib->peers, list ) {
225  list_del ( &peer->list );
226  free ( peer );
227  }
228 }
229 
230 /**
231  * Discard some entries from the REMAC cache
232  *
233  * @ret discarded Number of cached items discarded
234  */
235 static unsigned int ipoib_discard_remac ( void ) {
236  struct net_device *netdev;
237  struct ipoib_device *ipoib;
238  struct ipoib_peer *peer;
239  unsigned int discarded = 0;
240 
241  /* Try to discard one cache entry for each IPoIB device */
242  for_each_netdev ( netdev ) {
243 
244  /* Skip non-IPoIB devices */
245  if ( netdev->op != &ipoib_operations )
246  continue;
247  ipoib = netdev->priv;
248 
249  /* Discard least recently used cache entry (if any) */
251  list_del ( &peer->list );
252  free ( peer );
253  discarded++;
254  break;
255  }
256  }
257 
258  return discarded;
259 }
260 
261 /** IPoIB cache discarder */
262 struct cache_discarder ipoib_discarder __cache_discarder ( CACHE_EXPENSIVE ) = {
264 };
265 
266 /****************************************************************************
267  *
268  * IPoIB link layer
269  *
270  ****************************************************************************
271  */
272 
273 /**
274  * Initialise IPoIB link-layer address
275  *
276  * @v hw_addr Hardware address
277  * @v ll_addr Link-layer address
278  */
279 static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
280  const uint8_t *guid = hw_addr;
281  uint8_t *eth_addr = ll_addr;
282  uint8_t guid_mask = IPOIB_GUID_MASK;
283  unsigned int i;
284 
285  /* Extract bytes from GUID according to mask */
286  for ( i = 0 ; i < 8 ; i++, guid++, guid_mask <<= 1 ) {
287  if ( guid_mask & 0x80 )
288  *(eth_addr++) = *guid;
289  }
290 }
291 
292 /** IPoIB protocol */
293 struct ll_protocol ipoib_protocol __ll_protocol = {
294  .name = "IPoIB",
295  .ll_proto = htons ( ARPHRD_ETHER ),
296  .hw_addr_len = sizeof ( union ib_guid ),
297  .ll_addr_len = ETH_ALEN,
298  .ll_header_len = ETH_HLEN,
299  .push = eth_push,
300  .pull = eth_pull,
301  .init_addr = ipoib_init_addr,
302  .ntoa = eth_ntoa,
303  .mc_hash = eth_mc_hash,
304  .eth_addr = eth_eth_addr,
305  .eui64 = eth_eui64,
306  .flags = LL_NAME_ONLY,
307 };
308 
309 /**
310  * Allocate IPoIB device
311  *
312  * @v priv_size Size of driver private data
313  * @ret netdev Network device, or NULL
314  */
315 struct net_device * alloc_ipoibdev ( size_t priv_size ) {
316  struct net_device *netdev;
317 
318  netdev = alloc_netdev ( priv_size );
319  if ( netdev ) {
320  netdev->ll_protocol = &ipoib_protocol;
323  }
324  return netdev;
325 }
326 
327 /****************************************************************************
328  *
329  * IPoIB translation layer
330  *
331  ****************************************************************************
332  */
333 
334 /**
335  * Translate transmitted ARP packet
336  *
337  * @v netdev Network device
338  * @v iobuf Packet to be transmitted (with no link-layer headers)
339  * @ret rc Return status code
340  */
342  struct io_buffer *iobuf ) {
343  struct ipoib_device *ipoib = netdev->priv;
344  struct arphdr *arphdr = iobuf->data;
345  struct ipoib_mac *target_ha = NULL;
346  void *sender_pa;
347  void *target_pa;
348 
349  /* Do nothing unless ARP contains eIPoIB link-layer addresses */
350  if ( arphdr->ar_hln != ETH_ALEN )
351  return 0;
352 
353  /* Fail unless we have room to expand packet */
354  if ( iob_tailroom ( iobuf ) < ( 2 * ( sizeof ( ipoib->mac ) -
355  ETH_ALEN ) ) ) {
356  DBGC ( ipoib, "IPoIB %p insufficient space in TX ARP\n",
357  ipoib );
358  return -ENOBUFS;
359  }
360 
361  /* Look up REMAC, if applicable */
362  if ( arphdr->ar_op == ARPOP_REPLY ) {
363  target_ha = ipoib_find_remac ( ipoib, arp_target_pa ( arphdr ));
364  if ( ! target_ha ) {
365  DBGC ( ipoib, "IPoIB %p no REMAC for %s ARP reply\n",
366  ipoib, eth_ntoa ( arp_target_pa ( arphdr ) ) );
367  return -ENXIO_ARP_REPLY;
368  }
369  }
370 
371  /* Construct new packet */
372  iob_put ( iobuf, ( 2 * ( sizeof ( ipoib->mac ) - ETH_ALEN ) ) );
373  sender_pa = arp_sender_pa ( arphdr );
374  target_pa = arp_target_pa ( arphdr );
376  arphdr->ar_hln = sizeof ( ipoib->mac );
377  memcpy ( arp_target_pa ( arphdr ), target_pa, arphdr->ar_pln );
378  memcpy ( arp_sender_pa ( arphdr ), sender_pa, arphdr->ar_pln );
379  memcpy ( arp_sender_ha ( arphdr ), &ipoib->mac, sizeof ( ipoib->mac ) );
380  memset ( arp_target_ha ( arphdr ), 0, sizeof ( ipoib->mac ) );
381  if ( target_ha ) {
382  memcpy ( arp_target_ha ( arphdr ), target_ha,
383  sizeof ( *target_ha ) );
384  }
385 
386  return 0;
387 }
388 
389 /**
390  * Translate transmitted packet
391  *
392  * @v netdev Network device
393  * @v iobuf Packet to be transmitted (with no link-layer headers)
394  * @v net_proto Network-layer protocol (in network byte order)
395  * @ret rc Return status code
396  */
397 static int ipoib_translate_tx ( struct net_device *netdev,
398  struct io_buffer *iobuf, uint16_t net_proto ) {
399 
400  switch ( net_proto ) {
401  case htons ( ETH_P_ARP ) :
402  return ipoib_translate_tx_arp ( netdev, iobuf );
403  case htons ( ETH_P_IP ) :
404  /* No translation needed */
405  return 0;
406  default:
407  /* Cannot handle other traffic via eIPoIB */
408  return -ENOTSUP;
409  }
410 }
411 
412 /**
413  * Translate received ARP packet
414  *
415  * @v netdev Network device
416  * @v iobuf Received packet (with no link-layer headers)
417  * @v remac Constructed Remote Ethernet MAC
418  * @ret rc Return status code
419  */
421  struct io_buffer *iobuf,
422  struct ipoib_remac *remac ) {
423  struct ipoib_device *ipoib = netdev->priv;
424  struct arphdr *arphdr = iobuf->data;
425  void *sender_pa;
426  void *target_pa;
427  int rc;
428 
429  /* Do nothing unless ARP contains IPoIB link-layer addresses */
430  if ( arphdr->ar_hln != sizeof ( ipoib->mac ) )
431  return 0;
432 
433  /* Create REMAC cache entry */
434  if ( ( rc = ipoib_map_remac ( ipoib, remac,
435  arp_sender_ha ( arphdr ) ) ) != 0 ) {
436  DBGC ( ipoib, "IPoIB %p could not map REMAC: %s\n",
437  ipoib, strerror ( rc ) );
438  return rc;
439  }
440 
441  /* Construct new packet */
442  sender_pa = arp_sender_pa ( arphdr );
443  target_pa = arp_target_pa ( arphdr );
446  memcpy ( arp_sender_pa ( arphdr ), sender_pa, arphdr->ar_pln );
447  memcpy ( arp_target_pa ( arphdr ), target_pa, arphdr->ar_pln );
448  memcpy ( arp_sender_ha ( arphdr ), remac, ETH_ALEN );
449  memset ( arp_target_ha ( arphdr ), 0, ETH_ALEN );
450  if ( arphdr->ar_op == ARPOP_REPLY ) {
451  /* Assume received replies were directed to us */
453  }
454  iob_unput ( iobuf, ( 2 * ( sizeof ( ipoib->mac ) - ETH_ALEN ) ) );
455 
456  return 0;
457 }
458 
459 /**
460  * Translate received packet
461  *
462  * @v netdev Network device
463  * @v iobuf Received packet (with no link-layer headers)
464  * @v remac Constructed Remote Ethernet MAC
465  * @v net_proto Network-layer protocol (in network byte order)
466  * @ret rc Return status code
467  */
468 static int ipoib_translate_rx ( struct net_device *netdev,
469  struct io_buffer *iobuf,
470  struct ipoib_remac *remac,
471  uint16_t net_proto ) {
472 
473  switch ( net_proto ) {
474  case htons ( ETH_P_ARP ) :
475  return ipoib_translate_rx_arp ( netdev, iobuf, remac );
476  case htons ( ETH_P_IP ) :
477  /* No translation needed */
478  return 0;
479  default:
480  /* Cannot handle other traffic via eIPoIB */
481  return -ENOTSUP;
482  }
483 }
484 
485 /****************************************************************************
486  *
487  * IPoIB network device
488  *
489  ****************************************************************************
490  */
491 
492 /**
493  * Transmit packet via IPoIB network device
494  *
495  * @v netdev Network device
496  * @v iobuf I/O buffer
497  * @ret rc Return status code
498  */
499 static int ipoib_transmit ( struct net_device *netdev,
500  struct io_buffer *iobuf ) {
501  struct ipoib_device *ipoib = netdev->priv;
502  struct ib_device *ibdev = ipoib->ibdev;
503  struct ethhdr *ethhdr;
504  struct iphdr *iphdr;
505  struct ipoib_hdr *ipoib_hdr;
506  struct ipoib_remac *remac;
507  struct ipoib_mac *mac;
508  struct ib_address_vector *dest;
509  struct ib_address_vector av;
510  uint16_t net_proto;
511  int rc;
512 
513  /* Sanity check */
514  if ( iob_len ( iobuf ) < sizeof ( *ethhdr ) ) {
515  DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
516  return -EINVAL;
517  }
518 
519  /* Attempting transmission while link is down will put the
520  * queue pair into an error state, so don't try it.
521  */
522  if ( ! ib_link_ok ( ibdev ) )
523  return -ENETUNREACH;
524 
525  /* Strip eIPoIB header */
526  ethhdr = iobuf->data;
527  remac = ( ( struct ipoib_remac * ) ethhdr->h_dest );
528  net_proto = ethhdr->h_protocol;
529  iob_pull ( iobuf, sizeof ( *ethhdr ) );
530 
531  /* Identify destination address */
532  if ( is_multicast_ether_addr ( remac ) ) {
533 
534  /* Transmit multicasts as broadcasts, for simplicity */
535  dest = &ipoib->broadcast.av;
536 
537  } else if ( ( mac = ipoib_find_remac ( ipoib, remac ) ) ) {
538 
539  /* Construct address vector from IPoIB MAC */
540  dest = &av;
541  memset ( dest, 0, sizeof ( *dest ) );
542  dest->qpn = ( ntohl ( mac->flags__qpn ) & IB_QPN_MASK );
543  dest->qkey = ipoib->broadcast.av.qkey;
544  dest->gid_present = 1;
545  memcpy ( &dest->gid, &mac->gid, sizeof ( dest->gid ) );
546  if ( ( rc = ib_resolve_path ( ibdev, dest ) ) != 0 ) {
547  /* Path not resolved yet */
548  return rc;
549  }
550 
551  } else {
552 
553  /* Generate a new ARP request (if possible) to trigger
554  * population of the REMAC cache entry.
555  */
556  if ( ( net_proto != htons ( ETH_P_IP ) ) ||
557  ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) ) {
558  DBGC ( ipoib, "IPoIB %p no REMAC for %s non-IPv4 "
559  "packet type %04x\n", ipoib,
560  eth_ntoa ( ethhdr->h_dest ),
561  ntohs ( net_proto ) );
562  return -ENXIO_NON_IPV4;
563  }
564  iphdr = iobuf->data;
565  if ( ( rc = arp_tx_request ( netdev, &ipv4_protocol,
566  &iphdr->dest, &iphdr->src ) ) !=0){
567  DBGC ( ipoib, "IPoIB %p could not ARP for %s/%s/",
568  ipoib, eth_ntoa ( ethhdr->h_dest ),
569  inet_ntoa ( iphdr->dest ) );
570  DBGC ( ipoib, "%s: %s\n", inet_ntoa ( iphdr->src ),
571  strerror ( rc ) );
572  return rc;
573  }
574  DBGC ( ipoib, "IPoIB %p no REMAC for %s/%s/", ipoib,
575  eth_ntoa ( ethhdr->h_dest ), inet_ntoa ( iphdr->dest ) );
576  DBGC ( ipoib, "%s\n", inet_ntoa ( iphdr->src ) );
577  return -ENXIO_ARP_SENT;
578  }
579 
580  /* Translate packet if applicable */
581  if ( ( rc = ipoib_translate_tx ( netdev, iobuf, net_proto ) ) != 0 )
582  return rc;
583 
584  /* Prepend real IPoIB header */
585  ipoib_hdr = iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
586  ipoib_hdr->proto = net_proto;
587  ipoib_hdr->reserved = 0;
588 
589  /* Transmit packet */
590  return ib_post_send ( ibdev, ipoib->qp, dest, iobuf );
591 }
592 
593 /**
594  * Handle IPoIB send completion
595  *
596  * @v ibdev Infiniband device
597  * @v qp Queue pair
598  * @v iobuf I/O buffer
599  * @v rc Completion status code
600  */
601 static void ipoib_complete_send ( struct ib_device *ibdev __unused,
602  struct ib_queue_pair *qp,
603  struct io_buffer *iobuf, int rc ) {
604  struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
605 
606  netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
607 }
608 
609 /**
610  * Handle IPoIB receive completion
611  *
612  * @v ibdev Infiniband device
613  * @v qp Queue pair
614  * @v dest Destination address vector, or NULL
615  * @v source Source address vector, or NULL
616  * @v iobuf I/O buffer
617  * @v rc Completion status code
618  */
620  struct ib_queue_pair *qp,
621  struct ib_address_vector *dest,
622  struct ib_address_vector *source,
623  struct io_buffer *iobuf, int rc ) {
624  struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
625  struct net_device *netdev = ipoib->netdev;
626  struct ipoib_hdr *ipoib_hdr;
627  struct ethhdr *ethhdr;
628  struct ipoib_remac remac;
629  uint16_t net_proto;
630 
631  /* Record errors */
632  if ( rc != 0 ) {
633  netdev_rx_err ( netdev, iobuf, rc );
634  return;
635  }
636 
637  /* Sanity check */
638  if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
639  DBGC ( ipoib, "IPoIB %p received packet too short to "
640  "contain IPoIB header\n", ipoib );
641  DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
642  netdev_rx_err ( netdev, iobuf, -EIO );
643  return;
644  }
645  if ( ! source ) {
646  DBGC ( ipoib, "IPoIB %p received packet without address "
647  "vector\n", ipoib );
648  netdev_rx_err ( netdev, iobuf, -ENOTTY );
649  return;
650  }
651 
652  /* Strip real IPoIB header */
653  ipoib_hdr = iobuf->data;
654  net_proto = ipoib_hdr->proto;
655  iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
656 
657  /* Construct source address from remote QPN and LID */
658  remac.qpn = htonl ( source->qpn | EIPOIB_QPN_LA );
659  remac.lid = htons ( source->lid );
660 
661  /* Translate packet if applicable */
662  if ( ( rc = ipoib_translate_rx ( netdev, iobuf, &remac,
663  net_proto ) ) != 0 ) {
664  netdev_rx_err ( netdev, iobuf, rc );
665  return;
666  }
667 
668  /* Prepend eIPoIB header */
669  ethhdr = iob_push ( iobuf, sizeof ( *ethhdr ) );
670  memcpy ( &ethhdr->h_source, &remac, sizeof ( ethhdr->h_source ) );
671  ethhdr->h_protocol = net_proto;
672 
673  /* Construct destination address */
674  if ( dest->gid_present && IB_GID_MULTICAST ( &dest->gid ) ) {
675  /* Multicast GID: use the Ethernet broadcast address */
677  sizeof ( ethhdr->h_dest ) );
678  } else {
679  /* Assume destination address is local Ethernet MAC */
681  sizeof ( ethhdr->h_dest ) );
682  }
683 
684  /* Hand off to network layer */
685  netdev_rx ( netdev, iobuf );
686 }
687 
688 /** IPoIB completion operations */
691  .complete_recv = ipoib_complete_recv,
692 };
693 
694 /**
695  * Allocate IPoIB receive I/O buffer
696  *
697  * @v len Length of buffer
698  * @ret iobuf I/O buffer, or NULL
699  *
700  * Some Infiniband hardware requires 2kB alignment of receive buffers
701  * and provides no way to disable header separation. The result is
702  * that there are only four bytes of link-layer header (the real IPoIB
703  * header) before the payload. This is not sufficient space to insert
704  * an eIPoIB link-layer pseudo-header.
705  *
706  * We therefore allocate I/O buffers offset to start slightly before
707  * the natural alignment boundary, in order to allow sufficient space.
708  */
709 static struct io_buffer * ipoib_alloc_iob ( size_t len ) {
710  struct io_buffer *iobuf;
711  size_t reserve_len;
712 
713  /* Calculate additional length required at start of buffer */
714  reserve_len = ( sizeof ( struct ethhdr ) -
715  sizeof ( struct ipoib_hdr ) );
716 
717  /* Allocate buffer */
718  iobuf = alloc_iob_raw ( ( len + reserve_len ), len, -reserve_len );
719  if ( iobuf ) {
720  iob_reserve ( iobuf, reserve_len );
721  }
722  return iobuf;
723 }
724 
725 /** IPoIB queue pair operations */
728 };
729 
730 /**
731  * Poll IPoIB network device
732  *
733  * @v netdev Network device
734  */
735 static void ipoib_poll ( struct net_device *netdev ) {
736  struct ipoib_device *ipoib = netdev->priv;
737  struct ib_device *ibdev = ipoib->ibdev;
738 
739  /* Poll Infiniband device */
740  ib_poll_eq ( ibdev );
741 
742  /* Poll the retry timers (required for IPoIB multicast join) */
743  retry_poll();
744 }
745 
746 /**
747  * Handle IPv4 broadcast multicast group join completion
748  *
749  * @v membership Multicast group membership
750  * @v rc Status code
751  */
752 void ipoib_join_complete ( struct ib_mc_membership *membership, int rc ) {
753  struct ipoib_device *ipoib = container_of ( membership,
754  struct ipoib_device,
756 
757  /* Record join status as link status */
758  netdev_link_err ( ipoib->netdev, rc );
759 }
760 
761 /**
762  * Join IPv4 broadcast multicast group
763  *
764  * @v ipoib IPoIB device
765  * @ret rc Return status code
766  */
767 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
768  int rc;
769 
770  /* Join multicast group */
771  if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
772  &ipoib->broadcast.membership,
773  &ipoib->broadcast.av, 0,
774  ipoib_join_complete ) ) != 0 ) {
775  DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
776  ipoib, strerror ( rc ) );
777  return rc;
778  }
779 
780  return 0;
781 }
782 
783 /**
784  * Leave IPv4 broadcast multicast group
785  *
786  * @v ipoib IPoIB device
787  */
788 static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
789 
790  /* Leave multicast group */
791  ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
792  &ipoib->broadcast.membership );
793 }
794 
795 /**
796  * Handle link status change
797  *
798  * @v ipoib IPoIB device
799  */
800 static void ipoib_link_state_changed ( struct ipoib_device *ipoib ) {
801  struct ib_device *ibdev = ipoib->ibdev;
802  struct net_device *netdev = ipoib->netdev;
803  int rc;
804 
805  /* Leave existing broadcast group */
806  if ( ipoib->qp )
807  ipoib_leave_broadcast_group ( ipoib );
808 
809  /* Update MAC address based on potentially-new GID prefix */
810  memcpy ( &ipoib->mac.gid.s.prefix, &ibdev->gid.s.prefix,
811  sizeof ( ipoib->mac.gid.s.prefix ) );
812 
813  /* Update broadcast MAC GID based on potentially-new partition key */
814  ipoib->broadcast.mac.gid.words[2] =
815  htons ( ibdev->pkey | IB_PKEY_FULL );
816 
817  /* Construct broadcast address vector from broadcast MAC address */
818  memset ( &ipoib->broadcast.av, 0, sizeof ( ipoib->broadcast.av ) );
819  ipoib->broadcast.av.qpn = IB_QPN_BROADCAST;
820  ipoib->broadcast.av.gid_present = 1;
821  memcpy ( &ipoib->broadcast.av.gid, &ipoib->broadcast.mac.gid,
822  sizeof ( ipoib->broadcast.av.gid ) );
823 
824  /* Set net device link state to reflect Infiniband link state */
825  rc = ib_link_rc ( ibdev );
827 
828  /* Join new broadcast group */
829  if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) && ipoib->qp &&
830  ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
831  DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
832  "%s\n", ipoib, strerror ( rc ) );
834  return;
835  }
836 }
837 
838 /**
839  * Open IPoIB network device
840  *
841  * @v netdev Network device
842  * @ret rc Return status code
843  */
844 static int ipoib_open ( struct net_device *netdev ) {
845  struct ipoib_device *ipoib = netdev->priv;
846  struct ib_device *ibdev = ipoib->ibdev;
847  int rc;
848 
849  /* Open IB device */
850  if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
851  DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
852  ipoib, strerror ( rc ) );
853  goto err_ib_open;
854  }
855 
856  /* Allocate completion queue */
857  if ( ( rc = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op,
858  &ipoib->cq ) ) != 0 ) {
859  DBGC ( ipoib, "IPoIB %p could not create completion queue: "
860  "%s\n", ipoib, strerror ( rc ) );
861  goto err_create_cq;
862  }
863 
864  /* Allocate queue pair */
865  if ( ( rc = ib_create_qp ( ibdev, IB_QPT_UD, IPOIB_NUM_SEND_WQES,
866  ipoib->cq, IPOIB_NUM_RECV_WQES, ipoib->cq,
868  &ipoib->qp ) ) != 0 ) {
869  DBGC ( ipoib, "IPoIB %p could not create queue pair: %s\n",
870  ipoib, strerror ( rc ) );
871  goto err_create_qp;
872  }
873  ib_qp_set_ownerdata ( ipoib->qp, ipoib );
874 
875  /* Update MAC address with QPN */
876  ipoib->mac.flags__qpn = htonl ( ipoib->qp->qpn );
877 
878  /* Fill receive rings */
879  ib_refill_recv ( ibdev, ipoib->qp );
880 
881  /* Fake a link status change to join the broadcast group */
882  ipoib_link_state_changed ( ipoib );
883 
884  return 0;
885 
886  ib_destroy_qp ( ibdev, ipoib->qp );
887  err_create_qp:
888  ib_destroy_cq ( ibdev, ipoib->cq );
889  err_create_cq:
890  ib_close ( ibdev );
891  err_ib_open:
892  return rc;
893 }
894 
895 /**
896  * Close IPoIB network device
897  *
898  * @v netdev Network device
899  */
900 static void ipoib_close ( struct net_device *netdev ) {
901  struct ipoib_device *ipoib = netdev->priv;
902  struct ib_device *ibdev = ipoib->ibdev;
903 
904  /* Flush REMAC cache */
905  ipoib_flush_remac ( ipoib );
906 
907  /* Leave broadcast group */
908  ipoib_leave_broadcast_group ( ipoib );
909 
910  /* Remove QPN from MAC address */
911  ipoib->mac.flags__qpn = 0;
912 
913  /* Tear down the queues */
914  ib_destroy_qp ( ibdev, ipoib->qp );
915  ipoib->qp = NULL;
916  ib_destroy_cq ( ibdev, ipoib->cq );
917  ipoib->cq = NULL;
918 
919  /* Close IB device */
920  ib_close ( ibdev );
921 }
922 
923 /** IPoIB network device operations */
924 static struct net_device_operations ipoib_operations = {
925  .open = ipoib_open,
926  .close = ipoib_close,
927  .transmit = ipoib_transmit,
928  .poll = ipoib_poll,
929 };
930 
931 /**
932  * Probe IPoIB device
933  *
934  * @v ibdev Infiniband device
935  * @ret rc Return status code
936  */
937 static int ipoib_probe ( struct ib_device *ibdev ) {
938  struct net_device *netdev;
939  struct ipoib_device *ipoib;
940  int rc;
941 
942  /* Allocate network device */
943  netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
944  if ( ! netdev )
945  return -ENOMEM;
947  ipoib = netdev->priv;
948  netdev->dev = ibdev->dev;
949  memset ( ipoib, 0, sizeof ( *ipoib ) );
950  ipoib->netdev = netdev;
951  ipoib->ibdev = ibdev;
952  INIT_LIST_HEAD ( &ipoib->peers );
953 
954  /* Extract hardware address */
956  sizeof ( ibdev->gid.s.guid ) );
958 
959  /* Set local MAC address */
960  memcpy ( &ipoib->mac.gid.s.guid, &ibdev->gid.s.guid,
961  sizeof ( ipoib->mac.gid.s.guid ) );
962 
963  /* Set default broadcast MAC address */
964  memcpy ( &ipoib->broadcast.mac, &ipoib_broadcast,
965  sizeof ( ipoib->broadcast.mac ) );
966 
967  /* Add to list of IPoIB devices */
968  list_add_tail ( &ipoib->list, &ipoib_devices );
969 
970  /* Register network device */
971  if ( ( rc = register_netdev ( netdev ) ) != 0 )
972  goto err_register_netdev;
973 
974  return 0;
975 
977  err_register_netdev:
978  list_del ( &ipoib->list );
980  netdev_put ( netdev );
981  return rc;
982 }
983 
984 /**
985  * Handle device or link status change
986  *
987  * @v ibdev Infiniband device
988  */
989 static void ipoib_notify ( struct ib_device *ibdev ) {
990  struct ipoib_device *ipoib;
991 
992  /* Handle link status change for any attached IPoIB devices */
993  list_for_each_entry ( ipoib, &ipoib_devices, list ) {
994  if ( ipoib->ibdev != ibdev )
995  continue;
996  ipoib_link_state_changed ( ipoib );
997  }
998 }
999 
1000 /**
1001  * Remove IPoIB device
1002  *
1003  * @v ibdev Infiniband device
1004  */
1005 static void ipoib_remove ( struct ib_device *ibdev ) {
1006  struct ipoib_device *ipoib;
1007  struct ipoib_device *tmp;
1008  struct net_device *netdev;
1009 
1010  /* Remove any attached IPoIB devices */
1011  list_for_each_entry_safe ( ipoib, tmp, &ipoib_devices, list ) {
1012  if ( ipoib->ibdev != ibdev )
1013  continue;
1014  netdev = ipoib->netdev;
1016  list_del ( &ipoib->list );
1017  netdev_nullify ( netdev );
1018  netdev_put ( netdev );
1019  }
1020 }
1021 
1022 /** IPoIB driver */
1023 struct ib_driver ipoib_driver __ib_driver = {
1024  .name = "IPoIB",
1025  .probe = ipoib_probe,
1026  .notify = ipoib_notify,
1027  .remove = ipoib_remove,
1028 };
1029 
1030 /**
1031  * Find IPoIB network device
1032  *
1033  * @v ibdev Infiniband device
1034  * @ret netdev IPoIB network device, or NULL if not found
1035  */
1036 struct net_device * ipoib_netdev ( struct ib_device *ibdev ) {
1037  struct ipoib_device *ipoib;
1038 
1039  /* Find matching IPoIB device */
1040  list_for_each_entry ( ipoib, &ipoib_devices, list ) {
1041  if ( ipoib->ibdev != ibdev )
1042  continue;
1043  return ipoib->netdev;
1044  }
1045  return NULL;
1046 }
uint16_t h_protocol
Protocol ID.
Definition: if_ether.h:37
static void * arp_sender_pa(struct arphdr *arphdr)
ARP packet sender protocol address.
Definition: if_arp.h:80
#define iob_pull(iobuf, len)
Definition: iobuf.h:102
Address Resolution Protocol.
#define EINVAL
Invalid argument.
Definition: errno.h:428
int ib_link_rc(struct ib_device *ibdev)
Get link state.
Definition: infiniband.c:594
void ib_poll_eq(struct ib_device *ibdev)
Poll event queue.
Definition: infiniband.c:878
struct arbelprm_rc_send_wqe rc
Definition: arbel.h:14
Infiniband protocol.
unsigned short uint16_t
Definition: stdint.h:11
Infiniband path records.
void ipoib_join_complete(struct ib_mc_membership *membership, int rc)
Handle IPv4 broadcast multicast group join completion.
Definition: ipoib.c:752
#define iob_put(iobuf, len)
Definition: iobuf.h:120
static __always_inline int ib_link_ok(struct ib_device *ibdev)
Check link state of Infiniband device.
Definition: infiniband.h:565
Error message tables.
void netdev_rx_err(struct net_device *netdev, struct io_buffer *iobuf, int rc)
Discard received packet.
Definition: netdevice.c:586
union ib_guid guid
Definition: ib_packet.h:40
union ib_gid gid
Port GID (comprising GID prefix and port GUID)
Definition: infiniband.h:441
static int ib_is_open(struct ib_device *ibdev)
Check whether or not Infiniband device is open.
Definition: infiniband.h:576
int(* open)(struct net_device *netdev)
Open network device.
Definition: netdevice.h:222
static void * arp_target_ha(struct arphdr *arphdr)
ARP packet target hardware address.
Definition: if_arp.h:89
int eth_eth_addr(const void *ll_addr, void *eth_addr)
Generate Ethernet-compatible compressed link-layer address.
Definition: ethernet.c:222
#define list_add(new, head)
Add a new entry to the head of a list.
Definition: list.h:69
Infiniband multicast groups.
static void * arp_target_pa(struct arphdr *arphdr)
ARP packet target protocol address.
Definition: if_arp.h:98
Error codes.
struct in_addr src
Definition: ip.h:44
struct ib_device * ibdev
Underlying Infiniband device.
Definition: ipoib.c:91
int ib_mcast_join(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_mc_membership *membership, struct ib_address_vector *av, unsigned int mask, void(*complete)(struct ib_mc_membership *membership, int rc))
Join multicast group.
Definition: ib_mcast.c:152
static void ipoib_link_state_changed(struct ipoib_device *ipoib)
Handle link status change.
Definition: ipoib.c:800
#define iob_push(iobuf, len)
Definition: iobuf.h:84
struct net_device * ipoib_netdev(struct ib_device *ibdev)
Find IPoIB network device.
Definition: ipoib.c:1036
I/O buffers.
struct io_buffer * alloc_iob_raw(size_t len, size_t align, size_t offset)
Allocate I/O buffer with specified alignment and offset.
Definition: iobuf.c:48
#define __einfo_errortab(einfo)
Definition: errortab.h:23
#define IPOIB_NUM_RECV_WQES
Number of IPoIB receive work queue entries.
Definition: ipoib.c:71
Retry timers.
struct list_head list
List of network devices.
Definition: netdevice.h:356
#define ETH_P_IP
Definition: if_ether.h:18
#define IB_PKEY_FULL
Infiniband partition key full membership flag.
Definition: infiniband.h:42
struct ib_queue_pair * qp
Queue pair.
Definition: ipoib.c:97
int ib_create_cq(struct ib_device *ibdev, unsigned int num_cqes, struct ib_completion_queue_operations *op, struct ib_completion_queue **new_cq)
Create completion queue.
Definition: infiniband.c:98
uint8_t eth_broadcast[ETH_ALEN]
Ethernet broadcast MAC address.
Definition: ethernet.c:47
void ib_refill_recv(struct ib_device *ibdev, struct ib_queue_pair *qp)
Refill receive work queue.
Definition: infiniband.c:556
#define DBGC(...)
Definition: compiler.h:505
#define LL_NAME_ONLY
Local link-layer address functions only as a name.
Definition: netdevice.h:210
#define ENXIO_NON_IPV4
Definition: ipoib.c:58
struct io_buffer *(* alloc_iob)(size_t len)
Allocate receive I/O buffer.
Definition: infiniband.h:153
int ib_open(struct ib_device *ibdev)
Open port.
Definition: infiniband.c:652
struct device * dev
Underlying device.
Definition: infiniband.h:410
const uint8_t * ll_broadcast
Link-layer broadcast address.
Definition: netdevice.h:389
#define EIPOIB_QPN_LA
eIPoIB REMAC locally-assigned address indicator
Definition: ipoib.h:61
#define ntohl(value)
Definition: byteswap.h:134
#define ENXIO_ARP_SENT
Definition: ipoib.c:62
IP protocol.
An IPoIB REMAC cache entry.
Definition: ipoib.c:136
An Infiniband upper-layer driver.
Definition: infiniband.h:471
#define ntohs(value)
Definition: byteswap.h:136
uint8_t ar_hln
Link-layer address length.
Definition: if_arp.h:59
static int ipoib_translate_rx_arp(struct net_device *netdev, struct io_buffer *iobuf, struct ipoib_remac *remac)
Translate received ARP packet.
Definition: ipoib.c:420
void ib_close(struct ib_device *ibdev)
Close port.
Definition: infiniband.c:716
uint8_t mac[ETH_ALEN]
MAC address.
Definition: ena.h:24
unsigned int gid_present
GID is present.
Definition: infiniband.h:90
int eth_push(struct net_device *netdev __unused, struct io_buffer *iobuf, const void *ll_dest, const void *ll_source, uint16_t net_proto)
Add Ethernet link-layer header.
Definition: ethernet.c:77
static void ipoib_poll(struct net_device *netdev)
Poll IPoIB network device.
Definition: ipoib.c:735
struct net_device_operations * op
Network device operations.
Definition: netdevice.h:369
An IPv4 packet header.
Definition: ip.h:35
struct net_device * alloc_ipoibdev(size_t priv_size)
Allocate IPoIB device.
Definition: ipoib.c:315
#define htonl(value)
Definition: byteswap.h:133
A link-layer protocol.
Definition: netdevice.h:114
static LIST_HEAD(ipoib_devices)
List of all IPoIB devices.
Address Resolution Protocol constants and types.
int eth_eui64(const void *ll_addr, void *eui64)
Generate EUI-64 address.
Definition: ethernet.c:234
int eth_pull(struct net_device *netdev __unused, struct io_buffer *iobuf, const void **ll_dest, const void **ll_source, uint16_t *net_proto, unsigned int *flags)
Remove Ethernet link-layer header.
Definition: ethernet.c:101
#define ENOTSUP
Operation not supported.
Definition: errno.h:589
void ib_destroy_cq(struct ib_device *ibdev, struct ib_completion_queue *cq)
Destroy completion queue.
Definition: infiniband.c:145
A doubly-linked list entry (or list head)
Definition: list.h:18
#define EINFO_EINPROGRESS_JOINING
Definition: ipoib.c:115
Dynamic memory allocation.
uint16_t ar_hrd
Link-layer protocol.
Definition: if_arp.h:52
unsigned long tmp
Definition: linux_pci.h:63
An Infiniband device.
Definition: infiniband.h:398
static void netdev_init(struct net_device *netdev, struct net_device_operations *op)
Initialise a network device.
Definition: netdevice.h:515
struct net_device * alloc_netdev(size_t priv_len)
Allocate network device.
Definition: netdevice.c:721
const char * name
Protocol name.
Definition: netdevice.h:116
#define list_del(list)
Delete an entry from a list.
Definition: list.h:119
uint8_t h_dest[ETH_ALEN]
Destination MAC address.
Definition: if_ether.h:33
#define ENOMEM
Not enough space.
Definition: errno.h:534
Infiniband completion queue operations.
Definition: infiniband.h:194
void * memcpy(void *dest, const void *src, size_t len) __nonnull
Infiniband queue pair operations.
Definition: infiniband.h:147
static unsigned int ipoib_discard_remac(void)
Discard some entries from the REMAC cache.
Definition: ipoib.c:235
static int ipoib_probe(struct ib_device *ibdev)
Probe IPoIB device.
Definition: ipoib.c:937
unsigned long qkey
Queue key.
Definition: infiniband.h:79
IP over Infiniband.
#define ETH_HLEN
Definition: if_ether.h:9
struct list_head peers
REMAC cache.
Definition: ipoib.c:103
static void ipoib_complete_recv(struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct ib_address_vector *dest, struct ib_address_vector *source, struct io_buffer *iobuf, int rc)
Handle IPoIB receive completion.
Definition: ipoib.c:619
uint16_t reserved
Reserved, must be zero.
Definition: ipoib.h:37
static void netdev_put(struct net_device *netdev)
Drop reference to network device.
Definition: netdevice.h:572
#define container_of(ptr, type, field)
Get containing structure.
Definition: stddef.h:35
Ethernet protocol.
#define CACHE_EXPENSIVE
Items with a high replacement cost.
Definition: malloc.h:105
unsigned int(* discard)(void)
Discard some cached data.
Definition: malloc.h:89
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
static void ipoib_close(struct net_device *netdev)
Close IPoIB network device.
Definition: ipoib.c:900
void * priv
Driver private data.
Definition: netdevice.h:431
#define list_for_each_entry(pos, head, member)
Iterate over entries in a list.
Definition: list.h:431
static void ipoib_complete_send(struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct io_buffer *iobuf, int rc)
Handle IPoIB send completion.
Definition: ipoib.c:601
#define __unused
Declare a variable or data structure as unused.
Definition: compiler.h:573
#define list_add_tail(new, head)
Add a new entry to the tail of a list.
Definition: list.h:93
#define list_for_each_entry_reverse(pos, head, member)
Iterate over entries in a list in reverse order.
Definition: list.h:444
unsigned long qpn
Queue pair number.
Definition: infiniband.h:165
struct ipoib_mac mac
MAC address.
Definition: ipoib.c:142
static struct net_device * netdev
Definition: gdbudp.c:52
#define IB_GID_MULTICAST(gid)
Test for multicast GID.
Definition: ib_packet.h:52
struct list_head list
List of REMAC cache entries.
Definition: ipoib.c:138
int eth_mc_hash(unsigned int af, const void *net_addr, void *ll_addr)
Hash multicast address.
Definition: ethernet.c:193
static int is_multicast_ether_addr(const void *addr)
Check if Ethernet address is a multicast address.
Definition: ethernet.h:37
#define ARPHRD_INFINIBAND
InfiniBand.
Definition: if_arp.h:29
struct ipoib_mac mac
MAC address.
Definition: ipoib.c:79
An Infiniband multicast group membership.
Definition: ib_mcast.h:17
struct errortab ipoib_errors [] __errortab
Human-readable message for the link status.
Definition: ipoib.c:119
void unregister_netdev(struct net_device *netdev)
Unregister network device.
Definition: netdevice.c:941
An ARP header.
Definition: if_arp.h:47
#define list_for_each_entry_safe(pos, tmp, head, member)
Iterate over entries in a list, safe against deletion of the current entry.
Definition: list.h:458
static void ipoib_leave_broadcast_group(struct ipoib_device *ipoib)
Leave IPv4 broadcast multicast group.
Definition: ipoib.c:788
#define IB_QPN_BROADCAST
Broadcast QPN.
Definition: infiniband.h:33
int arp_tx_request(struct net_device *netdev, struct net_protocol *net_protocol, const void *net_dest, const void *net_source)
Transmit ARP request.
Definition: arp.c:59
union ib_guid prefix
Definition: ib_packet.h:39
static void ipoib_remove(struct ib_device *ibdev)
Remove IPoIB device.
Definition: ipoib.c:1005
#define iob_unput(iobuf, len)
Definition: iobuf.h:135
#define ENXIO_ARP_REPLY
Definition: ipoib.c:54
char * strerror(int errno)
Retrieve string representation of error number.
Definition: strerror.c:78
static void(* free)(struct refcnt *refcnt))
Definition: refcnt.h:54
uint16_t words[8]
Definition: ib_packet.h:35
static __always_inline void * ib_qp_get_ownerdata(struct ib_queue_pair *qp)
Get Infiniband queue pair owner-private data.
Definition: infiniband.h:664
#define for_each_netdev(netdev)
Iterate over all network devices.
Definition: netdevice.h:543
uint16_t ar_op
ARP opcode.
Definition: if_arp.h:63
int register_netdev(struct net_device *netdev)
Register network device.
Definition: netdevice.c:759
static size_t iob_len(struct io_buffer *iobuf)
Calculate length of data in an I/O buffer.
Definition: iobuf.h:155
const char * eth_ntoa(const void *ll_addr)
Transcribe Ethernet address.
Definition: ethernet.c:175
struct ipoib_remac remac
Remote Ethermet MAC.
Definition: ipoib.c:140
An IPoIB device.
Definition: ipoib.c:87
struct cache_discarder ipoib_discarder __cache_discarder(CACHE_EXPENSIVE)
IPoIB cache discarder.
An Infiniband Globally Unique Identifier.
Definition: ib_packet.h:18
static int ipoib_translate_tx_arp(struct net_device *netdev, struct io_buffer *iobuf)
Translate transmitted ARP packet.
Definition: ipoib.c:341
#define DBGC_HD(...)
Definition: compiler.h:507
A network device.
Definition: netdevice.h:352
void netdev_link_err(struct net_device *netdev, int rc)
Mark network device as having a specific link state.
Definition: netdevice.c:207
static size_t iob_tailroom(struct io_buffer *iobuf)
Calculate available space at end of an I/O buffer.
Definition: iobuf.h:175
char * inet_ntoa(struct in_addr in)
Convert IPv4 address to dotted-quad notation.
Definition: ipv4.c:668
#define ARPOP_REPLY
ARP reply.
Definition: if_arp.h:33
An Infiniband Completion Queue.
Definition: infiniband.h:224
static void netdev_nullify(struct net_device *netdev)
Stop using a network device.
Definition: netdevice.h:528
A cache discarder.
Definition: malloc.h:83
unsigned char uint8_t
Definition: stdint.h:10
void ib_destroy_qp(struct ib_device *ibdev, struct ib_queue_pair *qp)
Destroy queue pair.
Definition: infiniband.c:314
static int ipoib_transmit(struct net_device *netdev, struct io_buffer *iobuf)
Transmit packet via IPoIB network device.
Definition: ipoib.c:499
uint32_t flags__qpn
Queue pair number.
Definition: ipoib.h:24
uint8_t h_source[ETH_ALEN]
Source MAC address.
Definition: if_ether.h:35
struct in_addr dest
Definition: ip.h:45
static struct ib_completion_queue_operations ipoib_cq_op
IPoIB completion operations.
Definition: ipoib.c:689
unsigned long qpn
Queue Pair Number.
Definition: infiniband.h:74
#define ETH_ALEN
Definition: if_ether.h:8
int ib_resolve_path(struct ib_device *ibdev, struct ib_address_vector *av)
Resolve path.
Definition: ib_pathrec.c:249
static void * arp_sender_ha(struct arphdr *arphdr)
ARP packet sender hardware address.
Definition: if_arp.h:71
static struct io_buffer * ipoib_alloc_iob(size_t len)
Allocate IPoIB receive I/O buffer.
Definition: ipoib.c:709
void * malloc(size_t size)
Allocate memory.
Definition: malloc.c:583
uint32_t qpn
Remote QPN.
Definition: ipoib.h:55
static int ipoib_open(struct net_device *netdev)
Open IPoIB network device.
Definition: ipoib.c:844
#define IPOIB_GUID_MASK
GUID mask used for constructing eIPoIB Local Ethernet MAC address (LEMAC)
Definition: ipoib.h:41
uint64_t guid
GUID.
Definition: edd.h:30
Network device operations.
Definition: netdevice.h:213
void netdev_rx(struct net_device *netdev, struct io_buffer *iobuf)
Add packet to receive queue.
Definition: netdevice.c:548
struct device * dev
Underlying hardware device.
Definition: netdevice.h:364
void retry_poll(void)
Poll the retry timer list.
Definition: retry.c:197
An Infiniband Queue Pair.
Definition: infiniband.h:157
struct ib_mc_membership membership
Multicast group membership.
Definition: ipoib.c:83
Network device management.
struct ib_gid::@613 s
#define EINPROGRESS_JOINING
Link status for "broadcast join in progress".
Definition: ipoib.c:114
uint16_t proto
Network-layer protocol.
Definition: ipoib.h:35
struct arbelprm_qp_db_record qp
Definition: arbel.h:13
#define IPOIB_NUM_CQES
Number of IPoIB completion entries.
Definition: ipoib.c:74
int ib_post_send(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *dest, struct io_buffer *iobuf)
Post send work queue entry.
Definition: infiniband.c:416
#define iob_reserve(iobuf, len)
Definition: iobuf.h:67
#define INIT_LIST_HEAD(list)
Initialise a list head.
Definition: list.h:45
struct ipoib_broadcast broadcast
Broadcast address.
Definition: ipoib.c:101
#define IPOIB_NUM_SEND_WQES
Number of IPoIB send work queue entries.
Definition: ipoib.c:68
struct ib_driver ipoib_driver __ib_driver
IPoIB driver.
Definition: ipoib.c:1023
void netdev_tx_complete_err(struct net_device *netdev, struct io_buffer *iobuf, int rc)
Complete network transmission.
Definition: netdevice.c:470
char name[NETDEV_NAME_LEN]
Name of this network device.
Definition: netdevice.h:362
struct list_head list
List of IPoIB devices.
Definition: ipoib.c:93
#define ENOBUFS
No buffer space available.
Definition: errno.h:498
#define ENOTTY
Inappropriate I/O control operation.
Definition: errno.h:594
int ib_create_qp(struct ib_device *ibdev, enum ib_queue_pair_type type, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, struct ib_completion_queue *recv_cq, struct ib_queue_pair_operations *op, const char *name, struct ib_queue_pair **new_qp)
Create queue pair.
Definition: infiniband.c:199
static void ipoib_flush_remac(struct ipoib_device *ipoib)
Flush REMAC cache.
Definition: ipoib.c:220
static int ipoib_join_broadcast_group(struct ipoib_device *ipoib)
Join IPv4 broadcast multicast group.
Definition: ipoib.c:767
void * data
Start of data.
Definition: iobuf.h:48
eIPoIB Remote Ethernet MAC address
Definition: ipoib.h:48
#define EIO
Input/output error.
Definition: errno.h:433
union ib_gid gid
GID, if present.
Definition: infiniband.h:92
static void ipoib_notify(struct ib_device *ibdev)
Handle device or link status change.
Definition: ipoib.c:989
An IPoIB MAC address.
Definition: ipoib.h:18
void(* complete_send)(struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf, int rc)
Complete Send WQE.
Definition: infiniband.h:203
uint8_t ar_pln
Network-layer address length.
Definition: if_arp.h:61
if(len >=6 *4) __asm__ __volatile__("movsl" if(len >=5 *4) __asm__ __volatile__("movsl" if(len >=4 *4) __asm__ __volatile__("movsl" if(len >=3 *4) __asm__ __volatile__("movsl" if(len >=2 *4) __asm__ __volatile__("movsl" if(len >=1 *4) __asm__ __volatile__("movsl" if((len % 4) >=2) __asm__ __volatile__("movsw" if((len % 2) >=1) __asm__ __volatile__("movsb" return dest
Definition: string.h:150
IPoIB link-layer header.
Definition: ipoib.h:33
uint16_t pkey
Partition key.
Definition: infiniband.h:449
An Infiniband Address Vector.
Definition: infiniband.h:72
const char * name
Name.
Definition: infiniband.h:473
static struct net_device_operations ipoib_operations
IPoIB network device operations.
Definition: ipoib.c:126
struct mschapv2_challenge peer
Peer challenge.
Definition: mschapv2.h:12
static void ipoib_init_addr(const void *hw_addr, void *ll_addr)
Initialise IPoIB link-layer address.
Definition: ipoib.c:279
uint16_t lid
Remote LID.
Definition: ipoib.h:57
#define ETH_P_ARP
Definition: if_ether.h:19
unsigned int lid
Local ID.
Definition: infiniband.h:81
uint8_t ll_addr[MAX_LL_ADDR_LEN]
Link-layer address.
Definition: netdevice.h:387
size_t max_pkt_len
Maximum packet length.
Definition: netdevice.h:409
static struct ipoib_mac * ipoib_find_remac(struct ipoib_device *ipoib, const struct ipoib_remac *remac)
Find IPoIB MAC from REMAC.
Definition: ipoib.c:152
static int ipoib_translate_tx(struct net_device *netdev, struct io_buffer *iobuf, uint16_t net_proto)
Translate transmitted packet.
Definition: ipoib.c:397
void ib_mcast_leave(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_mc_membership *membership)
Leave multicast group.
Definition: ib_mcast.c:209
#define ARPHRD_ETHER
Ethernet 10Mbps.
Definition: if_arp.h:16
static int ipoib_translate_rx(struct net_device *netdev, struct io_buffer *iobuf, struct ipoib_remac *remac, uint16_t net_proto)
Translate received packet.
Definition: ipoib.c:468
static int ipoib_map_remac(struct ipoib_device *ipoib, const struct ipoib_remac *remac, const struct ipoib_mac *mac)
Add IPoIB MAC to REMAC cache.
Definition: ipoib.c:186
uint8_t lemac[ETH_ALEN]
IPoIB LEMAC (if non-default)
Definition: infiniband.h:464
#define ENETUNREACH
Network unreachable.
Definition: errno.h:488
An Ethernet link-layer header.
Definition: if_ether.h:31
static struct ib_queue_pair_operations ipoib_qp_op
IPoIB queue pair operations.
Definition: ipoib.c:726
int memcmp(const void *first, const void *second, size_t len)
Compare memory regions.
Definition: string.c:114
uint32_t len
Length.
Definition: ena.h:14
uint8_t hw_addr[MAX_HW_ADDR_LEN]
Hardware address.
Definition: netdevice.h:381
#define NULL
NULL pointer (VOID *)
Definition: Base.h:321
static __always_inline void ib_qp_set_ownerdata(struct ib_queue_pair *qp, void *priv)
Set Infiniband queue pair owner-private data.
Definition: infiniband.h:653
String functions.
struct ipoib_mac mac
Local MAC.
Definition: ipoib.c:99
struct ib_address_vector av
Address vector.
Definition: ipoib.c:81
#define htons(value)
Definition: byteswap.h:135
An IPoIB broadcast address.
Definition: ipoib.c:77
#define IB_QPN_MASK
QPN mask.
Definition: infiniband.h:36
struct ll_protocol * ll_protocol
Link-layer protocol.
Definition: netdevice.h:372
struct ib_completion_queue * cq
Completion queue.
Definition: ipoib.c:95
struct ll_protocol ipoib_protocol __ll_protocol
IPoIB protocol.
Definition: ipoib.c:293
union ib_gid gid
Port GID.
Definition: ipoib.h:26
#define IB_MAX_PAYLOAD_SIZE
Maximum payload size.
Definition: infiniband.h:50
void * memset(void *dest, int character, size_t len) __nonnull
struct net_device * netdev
Network device.
Definition: ipoib.c:89
A persistent I/O buffer.
Definition: iobuf.h:33