iPXE
eoib.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
26#include <stdio.h>
27#include <string.h>
28#include <errno.h>
29#include <ipxe/errortab.h>
30#include <ipxe/malloc.h>
31#include <ipxe/iobuf.h>
32#include <ipxe/if_ether.h>
33#include <ipxe/netdevice.h>
34#include <ipxe/ethernet.h>
35#include <ipxe/infiniband.h>
36#include <ipxe/ib_mcast.h>
37#include <ipxe/ib_pathrec.h>
38#include <ipxe/eoib.h>
39
40/** @file
41 *
42 * Ethernet over Infiniband
43 *
44 */
45
46/** Number of EoIB send work queue entries */
47#define EOIB_NUM_SEND_WQES 8
48
49/** Number of EoIB receive work queue entries */
50#define EOIB_NUM_RECV_WQES 4
51
52/** Number of EoIB completion queue entries */
53#define EOIB_NUM_CQES 16
54
55/** Link status for "broadcast join in progress" */
56#define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
57#define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
58 ( EINFO_EINPROGRESS, 0x01, "Joining" )
59
60/** Human-readable message for the link status */
61struct errortab eoib_errors[] __errortab = {
63};
64
65/** List of EoIB devices */
66static LIST_HEAD ( eoib_devices );
67
69
70/****************************************************************************
71 *
72 * EoIB peer cache
73 *
74 ****************************************************************************
75 */
76
77/** An EoIB peer cache entry */
78struct eoib_peer {
79 /** List of EoIB peer cache entries */
81 /** Ethernet MAC */
83 /** Infiniband address vector */
85};
86
87/**
88 * Find EoIB peer cache entry
89 *
90 * @v eoib EoIB device
91 * @v mac Ethernet MAC
92 * @ret peer EoIB peer, or NULL if not found
93 */
94static struct eoib_peer * eoib_find_peer ( struct eoib_device *eoib,
95 const uint8_t *mac ) {
96 struct eoib_peer *peer;
97
98 /* Find peer cache entry */
99 list_for_each_entry ( peer, &eoib->peers, list ) {
100 if ( memcmp ( mac, peer->mac, sizeof ( peer->mac ) ) == 0 ) {
101 /* Move peer to start of list */
102 list_del ( &peer->list );
103 list_add ( &peer->list, &eoib->peers );
104 return peer;
105 }
106 }
107
108 return NULL;
109}
110
111/**
112 * Create EoIB peer cache entry
113 *
114 * @v eoib EoIB device
115 * @v mac Ethernet MAC
116 * @ret peer EoIB peer, or NULL on error
117 */
118static struct eoib_peer * eoib_create_peer ( struct eoib_device *eoib,
119 const uint8_t *mac ) {
120 struct eoib_peer *peer;
121
122 /* Allocate and initialise peer cache entry */
123 peer = zalloc ( sizeof ( *peer ) );
124 if ( peer ) {
125 memcpy ( peer->mac, mac, sizeof ( peer->mac ) );
126 list_add ( &peer->list, &eoib->peers );
127 }
128 return peer;
129}
130
131/**
132 * Flush EoIB peer cache
133 *
134 * @v eoib EoIB device
135 */
136static void eoib_flush_peers ( struct eoib_device *eoib ) {
137 struct eoib_peer *peer;
138 struct eoib_peer *tmp;
139
141 list_del ( &peer->list );
142 free ( peer );
143 }
144}
145
146/**
147 * Discard some entries from the peer cache
148 *
149 * @ret discarded Number of cached items discarded
150 */
151static unsigned int eoib_discard ( void ) {
152 struct net_device *netdev;
153 struct eoib_device *eoib;
154 struct eoib_peer *peer;
155 unsigned int discarded = 0;
156
157 /* Try to discard one cache entry for each EoIB device */
159
160 /* Skip non-EoIB devices */
161 if ( netdev->op != &eoib_operations )
162 continue;
163 eoib = netdev->priv;
164
165 /* Discard least recently used cache entry (if any) */
167 list_del ( &peer->list );
168 free ( peer );
169 discarded++;
170 break;
171 }
172 }
173
174 return discarded;
175}
176
177/** EoIB cache discarder */
179 .discard = eoib_discard,
180};
181
182/**
183 * Find destination address vector
184 *
185 * @v eoib EoIB device
186 * @v mac Ethernet MAC
187 * @ret av Address vector, or NULL to send as broadcast
188 */
189static struct ib_address_vector * eoib_tx_av ( struct eoib_device *eoib,
190 const uint8_t *mac ) {
191 struct ib_device *ibdev = eoib->ibdev;
192 struct eoib_peer *peer;
193 int rc;
194
195 /* If this is a broadcast or multicast MAC address, then send
196 * this packet as a broadcast.
197 */
198 if ( is_multicast_ether_addr ( mac ) ) {
199 DBGCP ( eoib, "EoIB %s %s TX multicast\n",
200 eoib->name, eth_ntoa ( mac ) );
201 return NULL;
202 }
203
204 /* If we have no peer cache entry, then create one and send
205 * this packet as a broadcast.
206 */
207 peer = eoib_find_peer ( eoib, mac );
208 if ( ! peer ) {
209 DBGC ( eoib, "EoIB %s %s TX unknown\n",
210 eoib->name, eth_ntoa ( mac ) );
211 eoib_create_peer ( eoib, mac );
212 return NULL;
213 }
214
215 /* If we have not yet recorded a received GID and QPN for this
216 * peer cache entry, then send this packet as a broadcast.
217 */
218 if ( ! peer->av.gid_present ) {
219 DBGCP ( eoib, "EoIB %s %s TX not yet recorded\n",
220 eoib->name, eth_ntoa ( mac ) );
221 return NULL;
222 }
223
224 /* If we have not yet resolved a path to this peer, then send
225 * this packet as a broadcast.
226 */
227 if ( ( rc = ib_resolve_path ( ibdev, &peer->av ) ) != 0 ) {
228 DBGCP ( eoib, "EoIB %s %s TX not yet resolved\n",
229 eoib->name, eth_ntoa ( mac ) );
230 return NULL;
231 }
232
233 /* Force use of GRH even for local destinations */
234 peer->av.gid_present = 1;
235
236 /* We have a fully resolved peer: send this packet as a
237 * unicast.
238 */
239 DBGCP ( eoib, "EoIB %s %s TX " IB_GID_FMT " QPN %#lx\n", eoib->name,
240 eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
241 return &peer->av;
242}
243
244/**
245 * Record source address vector
246 *
247 * @v eoib EoIB device
248 * @v mac Ethernet MAC
249 * @v lid Infiniband LID
250 */
251static void eoib_rx_av ( struct eoib_device *eoib, const uint8_t *mac,
252 const struct ib_address_vector *av ) {
253 const union ib_gid *gid = &av->gid;
254 unsigned long qpn = av->qpn;
255 struct eoib_peer *peer;
256
257 /* Sanity checks */
258 if ( ! av->gid_present ) {
259 DBGC ( eoib, "EoIB %s %s RX with no GID\n",
260 eoib->name, eth_ntoa ( mac ) );
261 return;
262 }
263
264 /* Find peer cache entry (if any) */
265 peer = eoib_find_peer ( eoib, mac );
266 if ( ! peer ) {
267 DBGCP ( eoib, "EoIB %s %s RX " IB_GID_FMT " (ignored)\n",
268 eoib->name, eth_ntoa ( mac ), IB_GID_ARGS ( gid ) );
269 return;
270 }
271
272 /* Some dubious EoIB implementations utilise an Ethernet-to-
273 * EoIB gateway that will send packets from the wrong QPN.
274 */
275 if ( eoib_has_gateway ( eoib ) &&
276 ( memcmp ( gid, &eoib->gateway.gid, sizeof ( *gid ) ) == 0 ) ) {
277 qpn = eoib->gateway.qpn;
278 }
279
280 /* Do nothing if peer cache entry is complete and correct */
281 if ( ( peer->av.lid == av->lid ) && ( peer->av.qpn == qpn ) ) {
282 DBGCP ( eoib, "EoIB %s %s RX unchanged\n",
283 eoib->name, eth_ntoa ( mac ) );
284 return;
285 }
286
287 /* Update peer cache entry */
288 peer->av.qpn = qpn;
289 peer->av.qkey = eoib->broadcast.qkey;
290 peer->av.gid_present = 1;
291 memcpy ( &peer->av.gid, gid, sizeof ( peer->av.gid ) );
292 DBGC ( eoib, "EoIB %s %s RX " IB_GID_FMT " QPN %#lx\n", eoib->name,
293 eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
294}
295
296/****************************************************************************
297 *
298 * EoIB network device
299 *
300 ****************************************************************************
301 */
302
303/**
304 * Transmit packet via EoIB network device
305 *
306 * @v netdev Network device
307 * @v iobuf I/O buffer
308 * @ret rc Return status code
309 */
310static int eoib_transmit ( struct net_device *netdev,
311 struct io_buffer *iobuf ) {
312 struct eoib_device *eoib = netdev->priv;
313 struct eoib_header *eoib_hdr;
314 struct ethhdr *ethhdr;
315 struct ib_address_vector *av;
316 size_t zlen;
317
318 /* Sanity checks */
319 assert ( iob_len ( iobuf ) >= sizeof ( *ethhdr ) );
320 assert ( iob_headroom ( iobuf ) >= sizeof ( *eoib_hdr ) );
321
322 /* Look up destination address vector */
323 ethhdr = iobuf->data;
324 av = eoib_tx_av ( eoib, ethhdr->h_dest );
325
326 /* Prepend EoIB header */
327 eoib_hdr = iob_push ( iobuf, sizeof ( *eoib_hdr ) );
328 eoib_hdr->magic = htons ( EOIB_MAGIC );
329 eoib_hdr->reserved = 0;
330
331 /* Pad buffer to minimum Ethernet frame size */
332 zlen = ( sizeof ( *eoib_hdr ) + ETH_ZLEN );
333 assert ( zlen <= IOB_ZLEN );
334 if ( iob_len ( iobuf ) < zlen )
335 iob_pad ( iobuf, zlen );
336
337 /* If we have no unicast address then send as a broadcast,
338 * with a duplicate sent to the gateway if applicable.
339 */
340 if ( ! av ) {
341 av = &eoib->broadcast;
342 if ( eoib_has_gateway ( eoib ) )
343 eoib->duplicate ( eoib, iobuf );
344 }
345
346 /* Post send work queue entry */
347 return ib_post_send ( eoib->ibdev, eoib->qp, av, iobuf );
348}
349
350/**
351 * Handle EoIB send completion
352 *
353 * @v ibdev Infiniband device
354 * @v qp Queue pair
355 * @v iobuf I/O buffer
356 * @v rc Completion status code
357 */
358static void eoib_complete_send ( struct ib_device *ibdev __unused,
359 struct ib_queue_pair *qp,
360 struct io_buffer *iobuf, int rc ) {
361 struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
362
363 netdev_tx_complete_err ( eoib->netdev, iobuf, rc );
364}
365
366/**
367 * Handle EoIB receive completion
368 *
369 * @v ibdev Infiniband device
370 * @v qp Queue pair
371 * @v dest Destination address vector, or NULL
372 * @v source Source address vector, or NULL
373 * @v iobuf I/O buffer
374 * @v rc Completion status code
375 */
377 struct ib_queue_pair *qp,
379 struct ib_address_vector *source,
380 struct io_buffer *iobuf, int rc ) {
381 struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
382 struct net_device *netdev = eoib->netdev;
383 struct eoib_header *eoib_hdr;
384 struct ethhdr *ethhdr;
385
386 /* Record errors */
387 if ( rc != 0 ) {
388 netdev_rx_err ( netdev, iobuf, rc );
389 return;
390 }
391
392 /* Sanity check */
393 if ( iob_len ( iobuf ) < ( sizeof ( *eoib_hdr ) + sizeof ( *ethhdr ) )){
394 DBGC ( eoib, "EoIB %s received packet too short to "
395 "contain EoIB and Ethernet headers\n", eoib->name );
396 DBGC_HD ( eoib, iobuf->data, iob_len ( iobuf ) );
397 netdev_rx_err ( netdev, iobuf, -EIO );
398 return;
399 }
400 if ( ! source ) {
401 DBGC ( eoib, "EoIB %s received packet without address "
402 "vector\n", eoib->name );
403 netdev_rx_err ( netdev, iobuf, -ENOTTY );
404 return;
405 }
406
407 /* Strip EoIB header */
408 iob_pull ( iobuf, sizeof ( *eoib_hdr ) );
409
410 /* Update neighbour cache entry, if any */
411 ethhdr = iobuf->data;
412 eoib_rx_av ( eoib, ethhdr->h_source, source );
413
414 /* Hand off to network layer */
415 netdev_rx ( netdev, iobuf );
416}
417
418/** EoIB completion operations */
420 .complete_send = eoib_complete_send,
421 .complete_recv = eoib_complete_recv,
422};
423
424/** EoIB queue pair operations */
426 .alloc_iob = alloc_iob,
427};
428
429/**
430 * Poll EoIB network device
431 *
432 * @v netdev Network device
433 */
434static void eoib_poll ( struct net_device *netdev ) {
435 struct eoib_device *eoib = netdev->priv;
436 struct ib_device *ibdev = eoib->ibdev;
437
438 /* Poll Infiniband device */
439 ib_poll_eq ( ibdev );
440
441 /* Poll the retry timers (required for EoIB multicast join) */
442 retry_poll();
443}
444
445/**
446 * Handle EoIB broadcast multicast group join completion
447 *
448 * @v membership Multicast group membership
449 * @v rc Status code
450 */
451static void eoib_join_complete ( struct ib_mc_membership *membership, int rc ) {
452 struct eoib_device *eoib =
454
455 /* Record join status as link status */
456 netdev_link_err ( eoib->netdev, rc );
457}
458
459/**
460 * Join EoIB broadcast multicast group
461 *
462 * @v eoib EoIB device
463 * @ret rc Return status code
464 */
465static int eoib_join_broadcast_group ( struct eoib_device *eoib ) {
466 int rc;
467
468 /* Join multicast group */
469 if ( ( rc = ib_mcast_join ( eoib->ibdev, eoib->qp,
470 &eoib->membership, &eoib->broadcast,
471 eoib->mask, eoib_join_complete ) ) != 0 ) {
472 DBGC ( eoib, "EoIB %s could not join broadcast group: %s\n",
473 eoib->name, strerror ( rc ) );
474 return rc;
475 }
476
477 return 0;
478}
479
480/**
481 * Leave EoIB broadcast multicast group
482 *
483 * @v eoib EoIB device
484 */
485static void eoib_leave_broadcast_group ( struct eoib_device *eoib ) {
486
487 /* Leave multicast group */
488 ib_mcast_leave ( eoib->ibdev, eoib->qp, &eoib->membership );
489}
490
491/**
492 * Handle link status change
493 *
494 * @v eoib EoIB device
495 */
496static void eoib_link_state_changed ( struct eoib_device *eoib ) {
497 struct net_device *netdev = eoib->netdev;
498 struct ib_device *ibdev = eoib->ibdev;
499 int rc;
500
501 /* Leave existing broadcast group */
502 if ( eoib->qp )
504
505 /* Update broadcast GID based on potentially-new partition key */
506 eoib->broadcast.gid.words[2] = htons ( ibdev->pkey | IB_PKEY_FULL );
507
508 /* Set net device link state to reflect Infiniband link state */
509 rc = ib_link_rc ( ibdev );
511
512 /* Join new broadcast group */
513 if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) && eoib->qp &&
514 ( ( rc = eoib_join_broadcast_group ( eoib ) ) != 0 ) ) {
515 DBGC ( eoib, "EoIB %s could not rejoin broadcast group: "
516 "%s\n", eoib->name, strerror ( rc ) );
518 return;
519 }
520}
521
522/**
523 * Open EoIB network device
524 *
525 * @v netdev Network device
526 * @ret rc Return status code
527 */
528static int eoib_open ( struct net_device *netdev ) {
529 struct eoib_device *eoib = netdev->priv;
530 struct ib_device *ibdev = eoib->ibdev;
531 int rc;
532
533 /* Open IB device */
534 if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
535 DBGC ( eoib, "EoIB %s could not open %s: %s\n",
536 eoib->name, ibdev->name, strerror ( rc ) );
537 goto err_ib_open;
538 }
539
540 /* Allocate completion queue */
541 if ( ( rc = ib_create_cq ( ibdev, EOIB_NUM_CQES, &eoib_cq_op,
542 &eoib->cq ) ) != 0 ) {
543 DBGC ( eoib, "EoIB %s could not create completion queue: %s\n",
544 eoib->name, strerror ( rc ) );
545 goto err_create_cq;
546 }
547
548 /* Allocate queue pair */
549 if ( ( rc = ib_create_qp ( ibdev, IB_QPT_UD, EOIB_NUM_SEND_WQES,
550 eoib->cq, EOIB_NUM_RECV_WQES, eoib->cq,
551 &eoib_qp_op, netdev->name, &eoib->qp ) )!=0){
552 DBGC ( eoib, "EoIB %s could not create queue pair: %s\n",
553 eoib->name, strerror ( rc ) );
554 goto err_create_qp;
555 }
556 ib_qp_set_ownerdata ( eoib->qp, eoib );
557
558 /* Fill receive rings */
559 ib_refill_recv ( ibdev, eoib->qp );
560
561 /* Fake a link status change to join the broadcast group */
563
564 return 0;
565
566 ib_destroy_qp ( ibdev, eoib->qp );
567 eoib->qp = NULL;
568 err_create_qp:
569 ib_destroy_cq ( ibdev, eoib->cq );
570 eoib->cq = NULL;
571 err_create_cq:
572 ib_close ( ibdev );
573 err_ib_open:
574 return rc;
575}
576
577/**
578 * Close EoIB network device
579 *
580 * @v netdev Network device
581 */
582static void eoib_close ( struct net_device *netdev ) {
583 struct eoib_device *eoib = netdev->priv;
584 struct ib_device *ibdev = eoib->ibdev;
585
586 /* Flush peer cache */
587 eoib_flush_peers ( eoib );
588
589 /* Leave broadcast group */
591
592 /* Tear down the queues */
593 ib_destroy_qp ( ibdev, eoib->qp );
594 eoib->qp = NULL;
595 ib_destroy_cq ( ibdev, eoib->cq );
596 eoib->cq = NULL;
597
598 /* Close IB device */
599 ib_close ( ibdev );
600}
601
602/** EoIB network device operations */
604 .open = eoib_open,
605 .close = eoib_close,
606 .transmit = eoib_transmit,
607 .poll = eoib_poll,
608};
609
610/**
611 * Create EoIB device
612 *
613 * @v ibdev Infiniband device
614 * @v hw_addr Ethernet MAC
615 * @v broadcast Broadcast address vector
616 * @v name Interface name (or NULL to use default)
617 * @ret rc Return status code
618 */
619int eoib_create ( struct ib_device *ibdev, const uint8_t *hw_addr,
620 struct ib_address_vector *broadcast, const char *name ) {
621 struct net_device *netdev;
622 struct eoib_device *eoib;
623 int rc;
624
625 /* Allocate network device */
626 netdev = alloc_etherdev ( sizeof ( *eoib ) );
627 if ( ! netdev ) {
628 rc = -ENOMEM;
629 goto err_alloc;
630 }
632 eoib = netdev->priv;
633 netdev->dev = ibdev->dev;
634 eoib->netdev = netdev;
635 eoib->ibdev = ibdev_get ( ibdev );
636 memcpy ( &eoib->broadcast, broadcast, sizeof ( eoib->broadcast ) );
637 INIT_LIST_HEAD ( &eoib->peers );
638
639 /* Set MAC address */
640 memcpy ( netdev->hw_addr, hw_addr, ETH_ALEN );
641
642 /* Set interface name, if applicable */
643 if ( name )
644 snprintf ( netdev->name, sizeof ( netdev->name ), "%s", name );
645 eoib->name = netdev->name;
646
647 /* Add to list of EoIB devices */
648 list_add_tail ( &eoib->list, &eoib_devices );
649
650 /* Register network device */
651 if ( ( rc = register_netdev ( netdev ) ) != 0 )
652 goto err_register;
653
654 DBGC ( eoib, "EoIB %s created for %s MAC %s\n",
655 eoib->name, ibdev->name, eth_ntoa ( hw_addr ) );
656 DBGC ( eoib, "EoIB %s broadcast GID " IB_GID_FMT "\n",
657 eoib->name, IB_GID_ARGS ( &broadcast->gid ) );
658 return 0;
659
661 err_register:
662 list_del ( &eoib->list );
663 ibdev_put ( ibdev );
665 netdev_put ( netdev );
666 err_alloc:
667 return rc;
668}
669
670/**
671 * Find EoIB device
672 *
673 * @v ibdev Infiniband device
674 * @v hw_addr Original Ethernet MAC
675 * @ret eoib EoIB device
676 */
678 const uint8_t *hw_addr ) {
679 struct eoib_device *eoib;
680
681 list_for_each_entry ( eoib, &eoib_devices, list ) {
682 if ( ( eoib->ibdev == ibdev ) &&
683 ( memcmp ( eoib->netdev->hw_addr, hw_addr,
684 ETH_ALEN ) == 0 ) )
685 return eoib;
686 }
687 return NULL;
688}
689
690/**
691 * Remove EoIB device
692 *
693 * @v eoib EoIB device
694 */
695void eoib_destroy ( struct eoib_device *eoib ) {
696 struct net_device *netdev = eoib->netdev;
697
698 /* Unregister network device */
700
701 /* Remove from list of network devices */
702 list_del ( &eoib->list );
703
704 /* Drop reference to Infiniband device */
705 ibdev_put ( eoib->ibdev );
706
707 /* Free network device */
708 DBGC ( eoib, "EoIB %s destroyed\n", eoib->name );
710 netdev_put ( netdev );
711}
712
713/**
714 * Probe EoIB device
715 *
716 * @v ibdev Infiniband device
717 * @ret rc Return status code
718 */
719static int eoib_probe ( struct ib_device *ibdev __unused ) {
720
721 /* EoIB devices are not created automatically */
722 return 0;
723}
724
725/**
726 * Handle device or link status change
727 *
728 * @v ibdev Infiniband device
729 */
730static void eoib_notify ( struct ib_device *ibdev ) {
731 struct eoib_device *eoib;
732
733 /* Handle link status change for any attached EoIB devices */
734 list_for_each_entry ( eoib, &eoib_devices, list ) {
735 if ( eoib->ibdev != ibdev )
736 continue;
738 }
739}
740
741/**
742 * Remove EoIB device
743 *
744 * @v ibdev Infiniband device
745 */
746static void eoib_remove ( struct ib_device *ibdev ) {
747 struct eoib_device *eoib;
748 struct eoib_device *tmp;
749
750 /* Remove any attached EoIB devices */
751 list_for_each_entry_safe ( eoib, tmp, &eoib_devices, list ) {
752 if ( eoib->ibdev != ibdev )
753 continue;
754 eoib_destroy ( eoib );
755 }
756}
757
758/** EoIB driver */
759struct ib_driver eoib_driver __ib_driver = {
760 .name = "EoIB",
761 .probe = eoib_probe,
762 .notify = eoib_notify,
763 .remove = eoib_remove,
764};
765
766/****************************************************************************
767 *
768 * EoIB heartbeat packets
769 *
770 ****************************************************************************
771 */
772
773/**
774 * Silently ignore incoming EoIB heartbeat packets
775 *
776 * @v iobuf I/O buffer
777 * @v netdev Network device
778 * @v ll_source Link-layer source address
779 * @v flags Packet flags
780 * @ret rc Return status code
781 */
782static int eoib_heartbeat_rx ( struct io_buffer *iobuf,
783 struct net_device *netdev __unused,
784 const void *ll_dest __unused,
785 const void *ll_source __unused,
786 unsigned int flags __unused ) {
787 free_iob ( iobuf );
788 return 0;
789}
790
791/**
792 * Transcribe EoIB heartbeat address
793 *
794 * @v net_addr EoIB heartbeat address
795 * @ret string "<EoIB>"
796 *
797 * This operation is meaningless for the EoIB heartbeat protocol.
798 */
799static const char * eoib_heartbeat_ntoa ( const void *net_addr __unused ) {
800 return "<EoIB>";
801}
802
803/** EoIB heartbeat network protocol */
804struct net_protocol eoib_heartbeat_protocol __net_protocol = {
805 .name = "EoIB",
806 .net_proto = htons ( EOIB_MAGIC ),
807 .rx = eoib_heartbeat_rx,
808 .ntoa = eoib_heartbeat_ntoa,
809};
810
811/****************************************************************************
812 *
813 * EoIB gateway
814 *
815 ****************************************************************************
816 *
817 * Some dubious EoIB implementations require all broadcast traffic to
818 * be sent twice: once to the actual broadcast group, and once as a
819 * unicast to the EoIB-to-Ethernet gateway. This somewhat curious
820 * design arises since the EoIB-to-Ethernet gateway hardware lacks the
821 * ability to attach a queue pair to a multicast GID (or LID), and so
822 * cannot receive traffic sent to the broadcast group.
823 *
824 */
825
826/**
827 * Transmit duplicate packet to the EoIB gateway
828 *
829 * @v eoib EoIB device
830 * @v original Original I/O buffer
831 */
832static void eoib_duplicate ( struct eoib_device *eoib,
833 struct io_buffer *original ) {
834 struct net_device *netdev = eoib->netdev;
835 struct ib_device *ibdev = eoib->ibdev;
836 struct ib_address_vector *av = &eoib->gateway;
837 size_t len = iob_len ( original );
838 struct io_buffer *copy;
839 int rc;
840
841 /* Create copy of I/O buffer */
842 copy = alloc_iob ( len );
843 if ( ! copy ) {
844 rc = -ENOMEM;
845 goto err_alloc;
846 }
847 memcpy ( iob_put ( copy, len ), original->data, len );
848
849 /* Append to network device's transmit queue */
850 list_add_tail ( &copy->list, &original->list );
851
852 /* Resolve path to gateway */
853 if ( ( rc = ib_resolve_path ( ibdev, av ) ) != 0 ) {
854 DBGC ( eoib, "EoIB %s no path to gateway: %s\n",
855 eoib->name, strerror ( rc ) );
856 goto err_path;
857 }
858
859 /* Force use of GRH even for local destinations */
860 av->gid_present = 1;
861
862 /* Post send work queue entry */
863 if ( ( rc = ib_post_send ( eoib->ibdev, eoib->qp, av, copy ) ) != 0 )
864 goto err_post_send;
865
866 return;
867
868 err_post_send:
869 err_path:
870 list_del ( &copy->list );
871 err_alloc:
872 netdev_tx_err ( netdev, copy, rc );
873}
874
875/**
876 * Set EoIB gateway
877 *
878 * @v eoib EoIB device
879 * @v av Address vector, or NULL to clear gateway
880 */
881void eoib_set_gateway ( struct eoib_device *eoib,
882 struct ib_address_vector *av ) {
883
884 if ( av ) {
885 DBGC ( eoib, "EoIB %s using gateway " IB_GID_FMT "\n",
886 eoib->name, IB_GID_ARGS ( &av->gid ) );
887 memcpy ( &eoib->gateway, av, sizeof ( eoib->gateway ) );
889 } else {
890 DBGC ( eoib, "EoIB %s not using gateway\n", eoib->name );
891 eoib->duplicate = NULL;
892 }
893}
#define NULL
NULL pointer (VOID *)
Definition Base.h:322
u8 gid[16]
Definition CIB_PRM.h:3
__be32 qpn
Definition CIB_PRM.h:1
struct arbelprm_rc_send_wqe rc
Definition arbel.h:3
struct arbelprm_qp_db_record qp
Definition arbel.h:2
unsigned char uint8_t
Definition stdint.h:10
if(len >=6 *4) __asm__ __volatile__("movsl" if(len >=5 *4) __asm__ __volatile__("movsl" if(len >=4 *4) __asm__ __volatile__("movsl" if(len >=3 *4) __asm__ __volatile__("movsl" if(len >=2 *4) __asm__ __volatile__("movsl" if(len >=1 *4) __asm__ __volatile__("movsl" if((len % 4) >=2) __asm__ __volatile__("movsw" if((len % 2) >=1) __asm__ __volatile__("movsb" retur dest)
Definition string.h:151
#define assert(condition)
Assert a condition at run-time.
Definition assert.h:50
const char * name
Definition ath9k_hw.c:1986
ring len
Length.
Definition dwmac.h:226
uint8_t flags
Flags.
Definition ena.h:7
uint8_t mac[ETH_ALEN]
MAC address.
Definition ena.h:13
static int eoib_heartbeat_rx(struct io_buffer *iobuf, struct net_device *netdev __unused, const void *ll_dest __unused, const void *ll_source __unused, unsigned int flags __unused)
Silently ignore incoming EoIB heartbeat packets.
Definition eoib.c:782
static void eoib_complete_send(struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct io_buffer *iobuf, int rc)
Handle EoIB send completion.
Definition eoib.c:358
int eoib_create(struct ib_device *ibdev, const uint8_t *hw_addr, struct ib_address_vector *broadcast, const char *name)
Create EoIB device.
Definition eoib.c:619
static unsigned int eoib_discard(void)
Discard some entries from the peer cache.
Definition eoib.c:151
#define EINPROGRESS_JOINING
Link status for "broadcast join in progress".
Definition eoib.c:56
static void eoib_rx_av(struct eoib_device *eoib, const uint8_t *mac, const struct ib_address_vector *av)
Record source address vector.
Definition eoib.c:251
static void eoib_duplicate(struct eoib_device *eoib, struct io_buffer *original)
Transmit duplicate packet to the EoIB gateway.
Definition eoib.c:832
static void eoib_complete_recv(struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct ib_address_vector *dest __unused, struct ib_address_vector *source, struct io_buffer *iobuf, int rc)
Handle EoIB receive completion.
Definition eoib.c:376
#define EOIB_NUM_CQES
Number of EoIB completion queue entries.
Definition eoib.c:53
static const char * eoib_heartbeat_ntoa(const void *net_addr __unused)
Transcribe EoIB heartbeat address.
Definition eoib.c:799
static void eoib_link_state_changed(struct eoib_device *eoib)
Handle link status change.
Definition eoib.c:496
static void eoib_join_complete(struct ib_mc_membership *membership, int rc)
Handle EoIB broadcast multicast group join completion.
Definition eoib.c:451
static int eoib_transmit(struct net_device *netdev, struct io_buffer *iobuf)
Transmit packet via EoIB network device.
Definition eoib.c:310
static int eoib_probe(struct ib_device *ibdev __unused)
Probe EoIB device.
Definition eoib.c:719
struct eoib_device * eoib_find(struct ib_device *ibdev, const uint8_t *hw_addr)
Find EoIB device.
Definition eoib.c:677
static int eoib_join_broadcast_group(struct eoib_device *eoib)
Join EoIB broadcast multicast group.
Definition eoib.c:465
static struct eoib_peer * eoib_create_peer(struct eoib_device *eoib, const uint8_t *mac)
Create EoIB peer cache entry.
Definition eoib.c:118
static void eoib_poll(struct net_device *netdev)
Poll EoIB network device.
Definition eoib.c:434
static void eoib_leave_broadcast_group(struct eoib_device *eoib)
Leave EoIB broadcast multicast group.
Definition eoib.c:485
static struct eoib_peer * eoib_find_peer(struct eoib_device *eoib, const uint8_t *mac)
Find EoIB peer cache entry.
Definition eoib.c:94
static void eoib_flush_peers(struct eoib_device *eoib)
Flush EoIB peer cache.
Definition eoib.c:136
static void eoib_close(struct net_device *netdev)
Close EoIB network device.
Definition eoib.c:582
static struct ib_queue_pair_operations eoib_qp_op
EoIB queue pair operations.
Definition eoib.c:425
static void eoib_notify(struct ib_device *ibdev)
Handle device or link status change.
Definition eoib.c:730
static void eoib_remove(struct ib_device *ibdev)
Remove EoIB device.
Definition eoib.c:746
static struct net_device_operations eoib_operations
EoIB network device operations.
Definition eoib.c:68
#define EOIB_NUM_SEND_WQES
Number of EoIB send work queue entries.
Definition eoib.c:47
#define EINFO_EINPROGRESS_JOINING
Definition eoib.c:57
void eoib_set_gateway(struct eoib_device *eoib, struct ib_address_vector *av)
Set EoIB gateway.
Definition eoib.c:881
void eoib_destroy(struct eoib_device *eoib)
Remove EoIB device.
Definition eoib.c:695
#define EOIB_NUM_RECV_WQES
Number of EoIB receive work queue entries.
Definition eoib.c:50
static int eoib_open(struct net_device *netdev)
Open EoIB network device.
Definition eoib.c:528
static struct ib_address_vector * eoib_tx_av(struct eoib_device *eoib, const uint8_t *mac)
Find destination address vector.
Definition eoib.c:189
static struct ib_completion_queue_operations eoib_cq_op
EoIB completion operations.
Definition eoib.c:419
Ethernet over Infiniband.
static int eoib_has_gateway(struct eoib_device *eoib)
Check if EoIB device uses a gateway.
Definition eoib.h:71
#define EOIB_MAGIC
EoIB magic signature.
Definition eoib.h:27
Error codes.
Error message tables.
#define __errortab
Definition errortab.h:22
#define __einfo_errortab(einfo)
Definition errortab.h:24
struct net_device * alloc_etherdev(size_t priv_size)
Allocate Ethernet device.
Definition ethernet.c:265
const char * eth_ntoa(const void *ll_addr)
Transcribe Ethernet address.
Definition ethernet.c:176
Ethernet protocol.
static int is_multicast_ether_addr(const void *addr)
Check if Ethernet address is a multicast address.
Definition ethernet.h:38
static struct net_device * netdev
Definition gdbudp.c:53
#define __unused
Declare a variable or data structure as unused.
Definition compiler.h:573
#define CACHE_EXPENSIVE
Items with a high replacement cost.
Definition malloc.h:115
#define DBGCP(...)
Definition compiler.h:539
#define DBGC_HD(...)
Definition compiler.h:507
#define DBGC(...)
Definition compiler.h:505
#define FILE_LICENCE(_licence)
Declare a particular licence as applying to a file.
Definition compiler.h:896
#define ENOMEM
Not enough space.
Definition errno.h:535
#define EIO
Input/output error.
Definition errno.h:434
#define ENOTTY
Inappropriate I/O control operation.
Definition errno.h:595
int ib_mcast_join(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_mc_membership *membership, struct ib_address_vector *av, unsigned int mask, void(*complete)(struct ib_mc_membership *membership, int rc))
Join multicast group.
Definition ib_mcast.c:152
void ib_mcast_leave(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_mc_membership *membership)
Leave multicast group.
Definition ib_mcast.c:209
Infiniband multicast groups.
#define IB_GID_ARGS(gid)
Infiniband Global Identifier debug message arguments.
Definition ib_packet.h:49
#define IB_GID_FMT
Infiniband Global Identifier debug message format.
Definition ib_packet.h:46
int ib_resolve_path(struct ib_device *ibdev, struct ib_address_vector *av)
Resolve path.
Definition ib_pathrec.c:249
Infiniband path records.
#define ETH_ZLEN
Definition if_ether.h:11
#define ETH_ALEN
Definition if_ether.h:9
#define htons(value)
Definition byteswap.h:136
String functions.
void * memcpy(void *dest, const void *src, size_t len) __nonnull
void ib_refill_recv(struct ib_device *ibdev, struct ib_queue_pair *qp)
Refill receive work queue.
Definition infiniband.c:556
void ib_destroy_cq(struct ib_device *ibdev, struct ib_completion_queue *cq)
Destroy completion queue.
Definition infiniband.c:145
int ib_create_cq(struct ib_device *ibdev, unsigned int num_cqes, struct ib_completion_queue_operations *op, struct ib_completion_queue **new_cq)
Create completion queue.
Definition infiniband.c:98
int ib_open(struct ib_device *ibdev)
Open port.
Definition infiniband.c:652
int ib_link_rc(struct ib_device *ibdev)
Get link state.
Definition infiniband.c:594
int ib_post_send(struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *dest, struct io_buffer *iobuf)
Post send work queue entry.
Definition infiniband.c:416
void ib_poll_eq(struct ib_device *ibdev)
Poll event queue.
Definition infiniband.c:878
void ib_close(struct ib_device *ibdev)
Close port.
Definition infiniband.c:716
void ib_destroy_qp(struct ib_device *ibdev, struct ib_queue_pair *qp)
Destroy queue pair.
Definition infiniband.c:314
int ib_create_qp(struct ib_device *ibdev, enum ib_queue_pair_type type, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, struct ib_completion_queue *recv_cq, struct ib_queue_pair_operations *op, const char *name, struct ib_queue_pair **new_qp)
Create queue pair.
Definition infiniband.c:199
Infiniband protocol.
#define IB_PKEY_FULL
Infiniband partition key full membership flag.
Definition infiniband.h:43
static __always_inline int ib_link_ok(struct ib_device *ibdev)
Check link state of Infiniband device.
Definition infiniband.h:566
#define __ib_driver
Declare an Infiniband driver.
Definition infiniband.h:497
static __always_inline void * ib_qp_get_ownerdata(struct ib_queue_pair *qp)
Get Infiniband queue pair owner-private data.
Definition infiniband.h:665
static __always_inline struct ib_device * ibdev_get(struct ib_device *ibdev)
Get reference to Infiniband device.
Definition infiniband.h:588
static __always_inline void ib_qp_set_ownerdata(struct ib_queue_pair *qp, void *priv)
Set Infiniband queue pair owner-private data.
Definition infiniband.h:654
@ IB_QPT_UD
Definition infiniband.h:142
static int ib_is_open(struct ib_device *ibdev)
Check whether or not Infiniband device is open.
Definition infiniband.h:577
static __always_inline void ibdev_put(struct ib_device *ibdev)
Drop reference to Infiniband device.
Definition infiniband.h:599
void iob_pad(struct io_buffer *iobuf, size_t min_len)
Pad I/O buffer.
Definition iobpad.c:50
void free_iob(struct io_buffer *iobuf)
Free I/O buffer.
Definition iobuf.c:153
struct io_buffer * alloc_iob(size_t len)
Allocate I/O buffer.
Definition iobuf.c:131
I/O buffers.
static size_t iob_headroom(struct io_buffer *iobuf)
Calculate available space at start of an I/O buffer.
Definition iobuf.h:170
#define iob_push(iobuf, len)
Definition iobuf.h:89
#define IOB_ZLEN
Minimum I/O buffer length and alignment.
Definition iobuf.h:29
#define iob_put(iobuf, len)
Definition iobuf.h:125
static size_t iob_len(struct io_buffer *iobuf)
Calculate length of data in an I/O buffer.
Definition iobuf.h:160
#define iob_pull(iobuf, len)
Definition iobuf.h:107
unsigned long tmp
Definition linux_pci.h:65
#define list_for_each_entry_safe(pos, tmp, head, member)
Iterate over entries in a list, safe against deletion of the current entry.
Definition list.h:459
#define list_add_tail(new, head)
Add a new entry to the tail of a list.
Definition list.h:94
#define list_for_each_entry(pos, head, member)
Iterate over entries in a list.
Definition list.h:432
#define list_del(list)
Delete an entry from a list.
Definition list.h:120
#define INIT_LIST_HEAD(list)
Initialise a list head.
Definition list.h:46
#define LIST_HEAD(list)
Declare a static list head.
Definition list.h:38
#define list_for_each_entry_reverse(pos, head, member)
Iterate over entries in a list in reverse order.
Definition list.h:445
#define list_add(new, head)
Add a new entry to the head of a list.
Definition list.h:70
void * zalloc(size_t size)
Allocate cleared memory.
Definition malloc.c:662
Dynamic memory allocation.
#define __cache_discarder(cost)
Declare a cache discarder.
Definition malloc.h:106
struct mschapv2_challenge peer
Peer challenge.
Definition mschapv2.h:1
void netdev_link_err(struct net_device *netdev, int rc)
Mark network device as having a specific link state.
Definition netdevice.c:208
void netdev_rx(struct net_device *netdev, struct io_buffer *iobuf)
Add packet to receive queue.
Definition netdevice.c:549
void netdev_tx_err(struct net_device *netdev, struct io_buffer *iobuf, int rc)
Discard transmitted packet.
Definition netdevice.c:441
void unregister_netdev(struct net_device *netdev)
Unregister network device.
Definition netdevice.c:942
void netdev_tx_complete_err(struct net_device *netdev, struct io_buffer *iobuf, int rc)
Complete network transmission.
Definition netdevice.c:471
void netdev_rx_err(struct net_device *netdev, struct io_buffer *iobuf, int rc)
Discard received packet.
Definition netdevice.c:587
int register_netdev(struct net_device *netdev)
Register network device.
Definition netdevice.c:760
Network device management.
#define for_each_netdev(netdev)
Iterate over all network devices.
Definition netdevice.h:547
static void netdev_init(struct net_device *netdev, struct net_device_operations *op)
Initialise a network device.
Definition netdevice.h:519
static void netdev_nullify(struct net_device *netdev)
Stop using a network device.
Definition netdevice.h:532
static void netdev_put(struct net_device *netdev)
Drop reference to network device.
Definition netdevice.h:576
#define __net_protocol
Declare a network-layer protocol.
Definition netdevice.h:474
static void(* free)(struct refcnt *refcnt))
Definition refcnt.h:55
void retry_poll(void)
Poll the retry timer list.
Definition retry.c:198
#define container_of(ptr, type, field)
Get containing structure.
Definition stddef.h:36
char * strerror(int errno)
Retrieve string representation of error number.
Definition strerror.c:79
int memcmp(const void *first, const void *second, size_t len)
Compare memory regions.
Definition string.c:115
A cache discarder.
Definition malloc.h:93
An EoIB device.
Definition eoib.h:30
struct list_head peers
Peer cache.
Definition eoib.h:50
unsigned int mask
Multicast group additional component mask.
Definition eoib.h:62
void(* duplicate)(struct eoib_device *eoib, struct io_buffer *original)
Send duplicate packet to gateway (or NULL)
Definition eoib.h:57
struct ib_address_vector broadcast
Broadcast address.
Definition eoib.h:40
struct ib_queue_pair * qp
Queue pair.
Definition eoib.h:45
struct ib_device * ibdev
Underlying Infiniband device.
Definition eoib.h:36
struct ib_address_vector gateway
Gateway (if any)
Definition eoib.h:60
const char * name
Name.
Definition eoib.h:32
struct list_head list
List of EoIB devices.
Definition eoib.h:38
struct ib_mc_membership membership
Broadcast group membership.
Definition eoib.h:47
struct ib_completion_queue * cq
Completion queue.
Definition eoib.h:43
struct net_device * netdev
Network device.
Definition eoib.h:34
An EoIB header.
Definition eoib.h:19
uint16_t reserved
Reserved.
Definition eoib.h:23
uint16_t magic
Signature.
Definition eoib.h:21
An EoIB peer cache entry.
Definition eoib.c:78
struct ib_address_vector av
Infiniband address vector.
Definition eoib.c:84
struct list_head list
List of EoIB peer cache entries.
Definition eoib.c:80
uint8_t mac[ETH_ALEN]
Ethernet MAC.
Definition eoib.c:82
An Ethernet link-layer header.
Definition if_ether.h:32
uint8_t h_dest[ETH_ALEN]
Destination MAC address.
Definition if_ether.h:34
uint8_t h_source[ETH_ALEN]
Source MAC address.
Definition if_ether.h:36
An Infiniband Address Vector.
Definition infiniband.h:73
unsigned long qkey
Queue key.
Definition infiniband.h:80
unsigned int gid_present
GID is present.
Definition infiniband.h:91
unsigned int lid
Local ID.
Definition infiniband.h:82
union ib_gid gid
GID, if present.
Definition infiniband.h:93
unsigned long qpn
Queue Pair Number.
Definition infiniband.h:75
Infiniband completion queue operations.
Definition infiniband.h:195
An Infiniband device.
Definition infiniband.h:399
char name[IBDEV_NAME_LEN]
Name of this Infiniband device.
Definition infiniband.h:409
uint16_t pkey
Partition key.
Definition infiniband.h:450
struct device * dev
Underlying device.
Definition infiniband.h:411
An Infiniband upper-layer driver.
Definition infiniband.h:472
An Infiniband multicast group membership.
Definition ib_mcast.h:17
Infiniband queue pair operations.
Definition infiniband.h:148
An Infiniband Queue Pair.
Definition infiniband.h:158
A persistent I/O buffer.
Definition iobuf.h:38
void * data
Start of data.
Definition iobuf.h:53
struct list_head list
List of which this buffer is a member.
Definition iobuf.h:45
A doubly-linked list entry (or list head)
Definition list.h:19
Network device operations.
Definition netdevice.h:214
A network device.
Definition netdevice.h:353
uint8_t hw_addr[MAX_HW_ADDR_LEN]
Hardware address.
Definition netdevice.h:382
A network-layer protocol.
Definition netdevice.h:65
An Infiniband Global Identifier.
Definition ib_packet.h:34
uint16_t words[8]
Definition ib_packet.h:36
int snprintf(char *buf, size_t size, const char *fmt,...)
Write a formatted string to a buffer.
Definition vsprintf.c:383