iPXE
infiniband.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License as
00006  * published by the Free Software Foundation; either version 2 of the
00007  * License, or any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00017  * 02110-1301, USA.
00018  *
00019  * You can also choose to distribute this program under the terms of
00020  * the Unmodified Binary Distribution Licence (as given in the file
00021  * COPYING.UBDL), provided that you have satisfied its requirements.
00022  */
00023 
00024 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
00025 
00026 #include <stdint.h>
00027 #include <stdlib.h>
00028 #include <stdio.h>
00029 #include <string.h>
00030 #include <unistd.h>
00031 #include <byteswap.h>
00032 #include <errno.h>
00033 #include <assert.h>
00034 #include <ipxe/list.h>
00035 #include <ipxe/errortab.h>
00036 #include <ipxe/if_arp.h>
00037 #include <ipxe/netdevice.h>
00038 #include <ipxe/iobuf.h>
00039 #include <ipxe/process.h>
00040 #include <ipxe/profile.h>
00041 #include <ipxe/infiniband.h>
00042 #include <ipxe/ib_mi.h>
00043 #include <ipxe/ib_sma.h>
00044 
00045 /** @file
00046  *
00047  * Infiniband protocol
00048  *
00049  */
00050 
00051 /** List of Infiniband devices */
00052 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
00053 
00054 /** List of open Infiniband devices, in reverse order of opening */
00055 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
00056 
00057 /** Infiniband device index */
00058 static unsigned int ibdev_index = 0;
00059 
00060 /** Post send work queue entry profiler */
00061 static struct profiler ib_post_send_profiler __profiler =
00062         { .name = "ib.post_send" };
00063 
00064 /** Post receive work queue entry profiler */
00065 static struct profiler ib_post_recv_profiler __profiler =
00066         { .name = "ib.post_recv" };
00067 
00068 /* Disambiguate the various possible EINPROGRESSes */
00069 #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
00070 #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
00071         ( EINFO_EINPROGRESS, 0x01, "Initialising" )
00072 #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
00073 #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
00074         ( EINFO_EINPROGRESS, 0x02, "Armed" )
00075 
00076 /** Human-readable message for the link statuses */
00077 struct errortab infiniband_errors[] __errortab = {
00078         __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
00079         __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
00080 };
00081 
00082 /***************************************************************************
00083  *
00084  * Completion queues
00085  *
00086  ***************************************************************************
00087  */
00088 
00089 /**
00090  * Create completion queue
00091  *
00092  * @v ibdev             Infiniband device
00093  * @v num_cqes          Number of completion queue entries
00094  * @v op                Completion queue operations
00095  * @v new_cq            New completion queue to fill in
00096  * @ret rc              Return status code
00097  */
00098 int ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
00099                    struct ib_completion_queue_operations *op,
00100                    struct ib_completion_queue **new_cq ) {
00101         struct ib_completion_queue *cq;
00102         int rc;
00103 
00104         DBGC ( ibdev, "IBDEV %s creating completion queue\n", ibdev->name );
00105 
00106         /* Allocate and initialise data structure */
00107         cq = zalloc ( sizeof ( *cq ) );
00108         if ( ! cq ) {
00109                 rc = -ENOMEM;
00110                 goto err_alloc_cq;
00111         }
00112         cq->ibdev = ibdev;
00113         list_add_tail ( &cq->list, &ibdev->cqs );
00114         cq->num_cqes = num_cqes;
00115         INIT_LIST_HEAD ( &cq->work_queues );
00116         cq->op = op;
00117 
00118         /* Perform device-specific initialisation and get CQN */
00119         if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
00120                 DBGC ( ibdev, "IBDEV %s could not initialise completion "
00121                        "queue: %s\n", ibdev->name, strerror ( rc ) );
00122                 goto err_dev_create_cq;
00123         }
00124 
00125         DBGC ( ibdev, "IBDEV %s created %d-entry completion queue %p (%p) "
00126                "with CQN %#lx\n", ibdev->name, num_cqes, cq,
00127                ib_cq_get_drvdata ( cq ), cq->cqn );
00128         *new_cq = cq;
00129         return 0;
00130 
00131         ibdev->op->destroy_cq ( ibdev, cq );
00132  err_dev_create_cq:
00133         list_del ( &cq->list );
00134         free ( cq );
00135  err_alloc_cq:
00136         return rc;
00137 }
00138 
00139 /**
00140  * Destroy completion queue
00141  *
00142  * @v ibdev             Infiniband device
00143  * @v cq                Completion queue
00144  */
00145 void ib_destroy_cq ( struct ib_device *ibdev,
00146                      struct ib_completion_queue *cq ) {
00147         DBGC ( ibdev, "IBDEV %s destroying completion queue %#lx\n",
00148                ibdev->name, cq->cqn );
00149         assert ( list_empty ( &cq->work_queues ) );
00150         ibdev->op->destroy_cq ( ibdev, cq );
00151         list_del ( &cq->list );
00152         free ( cq );
00153 }
00154 
00155 /**
00156  * Poll completion queue
00157  *
00158  * @v ibdev             Infiniband device
00159  * @v cq                Completion queue
00160  */
00161 void ib_poll_cq ( struct ib_device *ibdev,
00162                   struct ib_completion_queue *cq ) {
00163         struct ib_work_queue *wq;
00164 
00165         /* Poll completion queue */
00166         ibdev->op->poll_cq ( ibdev, cq );
00167 
00168         /* Refill receive work queues */
00169         list_for_each_entry ( wq, &cq->work_queues, list ) {
00170                 if ( ! wq->is_send )
00171                         ib_refill_recv ( ibdev, wq->qp );
00172         }
00173 }
00174 
00175 /***************************************************************************
00176  *
00177  * Work queues
00178  *
00179  ***************************************************************************
00180  */
00181 
00182 /**
00183  * Create queue pair
00184  *
00185  * @v ibdev             Infiniband device
00186  * @v type              Queue pair type
00187  * @v num_send_wqes     Number of send work queue entries
00188  * @v send_cq           Send completion queue
00189  * @v num_recv_wqes     Number of receive work queue entries
00190  * @v recv_cq           Receive completion queue
00191  * @v op                Queue pair operations
00192  * @v name              Queue pair name
00193  * @v new_qp            New queue pair to fill in
00194  * @ret rc              Return status code
00195  *
00196  * The queue pair will be left in the INIT state; you must call
00197  * ib_modify_qp() before it is ready to use for sending and receiving.
00198  */
00199 int ib_create_qp ( struct ib_device *ibdev, enum ib_queue_pair_type type,
00200                    unsigned int num_send_wqes,
00201                    struct ib_completion_queue *send_cq,
00202                    unsigned int num_recv_wqes,
00203                    struct ib_completion_queue *recv_cq,
00204                    struct ib_queue_pair_operations *op, const char *name,
00205                    struct ib_queue_pair **new_qp ) {
00206         struct ib_queue_pair *qp;
00207         size_t total_size;
00208         int rc;
00209 
00210         DBGC ( ibdev, "IBDEV %s creating queue pair\n", ibdev->name );
00211 
00212         /* Allocate and initialise data structure */
00213         total_size = ( sizeof ( *qp ) +
00214                        ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
00215                        ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
00216         qp = zalloc ( total_size );
00217         if ( ! qp ) {
00218                 rc = -ENOMEM;
00219                 goto err_alloc_qp;
00220         }
00221         qp->ibdev = ibdev;
00222         list_add_tail ( &qp->list, &ibdev->qps );
00223         qp->type = type;
00224         qp->send.qp = qp;
00225         qp->send.is_send = 1;
00226         qp->send.cq = send_cq;
00227         list_add_tail ( &qp->send.list, &send_cq->work_queues );
00228         qp->send.psn = ( random() & 0xffffffUL );
00229         qp->send.num_wqes = num_send_wqes;
00230         qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
00231         qp->recv.qp = qp;
00232         qp->recv.cq = recv_cq;
00233         list_add_tail ( &qp->recv.list, &recv_cq->work_queues );
00234         qp->recv.psn = ( random() & 0xffffffUL );
00235         qp->recv.num_wqes = num_recv_wqes;
00236         qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
00237                             ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
00238         INIT_LIST_HEAD ( &qp->mgids );
00239         qp->op = op;
00240         qp->name = name;
00241 
00242         /* Perform device-specific initialisation and get QPN */
00243         if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
00244                 DBGC ( ibdev, "IBDEV %s could not initialise queue pair: "
00245                        "%s\n", ibdev->name, strerror ( rc ) );
00246                 goto err_dev_create_qp;
00247         }
00248         DBGC ( ibdev, "IBDEV %s created queue pair %p (%p) with QPN %#lx\n",
00249                ibdev->name, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
00250         DBGC ( ibdev, "IBDEV %s QPN %#lx has %d send entries at [%p,%p)\n",
00251                ibdev->name, qp->qpn, num_send_wqes, qp->send.iobufs,
00252                qp->recv.iobufs );
00253         DBGC ( ibdev, "IBDEV %s QPN %#lx has %d receive entries at [%p,%p)\n",
00254                ibdev->name, qp->qpn, num_recv_wqes, qp->recv.iobufs,
00255                ( ( ( void * ) qp ) + total_size ) );
00256 
00257         /* Calculate externally-visible QPN */
00258         switch ( type ) {
00259         case IB_QPT_SMI:
00260                 qp->ext_qpn = IB_QPN_SMI;
00261                 break;
00262         case IB_QPT_GSI:
00263                 qp->ext_qpn = IB_QPN_GSI;
00264                 break;
00265         default:
00266                 qp->ext_qpn = qp->qpn;
00267                 break;
00268         }
00269         if ( qp->ext_qpn != qp->qpn ) {
00270                 DBGC ( ibdev, "IBDEV %s QPN %#lx has external QPN %#lx\n",
00271                        ibdev->name, qp->qpn, qp->ext_qpn );
00272         }
00273 
00274         *new_qp = qp;
00275         return 0;
00276 
00277         ibdev->op->destroy_qp ( ibdev, qp );
00278  err_dev_create_qp:
00279         list_del ( &qp->send.list );
00280         list_del ( &qp->recv.list );
00281         list_del ( &qp->list );
00282         free ( qp );
00283  err_alloc_qp:
00284         return rc;
00285 }
00286 
00287 /**
00288  * Modify queue pair
00289  *
00290  * @v ibdev             Infiniband device
00291  * @v qp                Queue pair
00292  * @ret rc              Return status code
00293  */
00294 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
00295         int rc;
00296 
00297         DBGC ( ibdev, "IBDEV %s modifying QPN %#lx\n", ibdev->name, qp->qpn );
00298 
00299         if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
00300                 DBGC ( ibdev, "IBDEV %s could not modify QPN %#lx: %s\n",
00301                        ibdev->name, qp->qpn, strerror ( rc ) );
00302                 return rc;
00303         }
00304 
00305         return 0;
00306 }
00307 
00308 /**
00309  * Destroy queue pair
00310  *
00311  * @v ibdev             Infiniband device
00312  * @v qp                Queue pair
00313  */
00314 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
00315         struct io_buffer *iobuf;
00316         unsigned int i;
00317 
00318         DBGC ( ibdev, "IBDEV %s destroying QPN %#lx\n",
00319                ibdev->name, qp->qpn );
00320 
00321         assert ( list_empty ( &qp->mgids ) );
00322 
00323         /* Perform device-specific destruction */
00324         ibdev->op->destroy_qp ( ibdev, qp );
00325 
00326         /* Complete any remaining I/O buffers with errors */
00327         for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
00328                 if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
00329                         ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
00330         }
00331         for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
00332                 if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
00333                         ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
00334                                            -ECANCELED );
00335                 }
00336         }
00337 
00338         /* Remove work queues from completion queue */
00339         list_del ( &qp->send.list );
00340         list_del ( &qp->recv.list );
00341 
00342         /* Free QP */
00343         list_del ( &qp->list );
00344         free ( qp );
00345 }
00346 
00347 /**
00348  * Find queue pair by QPN
00349  *
00350  * @v ibdev             Infiniband device
00351  * @v qpn               Queue pair number
00352  * @ret qp              Queue pair, or NULL
00353  */
00354 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
00355                                         unsigned long qpn ) {
00356         struct ib_queue_pair *qp;
00357 
00358         list_for_each_entry ( qp, &ibdev->qps, list ) {
00359                 if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
00360                         return qp;
00361         }
00362         return NULL;
00363 }
00364 
00365 /**
00366  * Find queue pair by multicast GID
00367  *
00368  * @v ibdev             Infiniband device
00369  * @v gid               Multicast GID
00370  * @ret qp              Queue pair, or NULL
00371  */
00372 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
00373                                          union ib_gid *gid ) {
00374         struct ib_queue_pair *qp;
00375         struct ib_multicast_gid *mgid;
00376 
00377         list_for_each_entry ( qp, &ibdev->qps, list ) {
00378                 list_for_each_entry ( mgid, &qp->mgids, list ) {
00379                         if ( memcmp ( &mgid->gid, gid,
00380                                       sizeof ( mgid->gid ) ) == 0 ) {
00381                                 return qp;
00382                         }
00383                 }
00384         }
00385         return NULL;
00386 }
00387 
00388 /**
00389  * Find work queue belonging to completion queue
00390  *
00391  * @v cq                Completion queue
00392  * @v qpn               Queue pair number
00393  * @v is_send           Find send work queue (rather than receive)
00394  * @ret wq              Work queue, or NULL if not found
00395  */
00396 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
00397                                     unsigned long qpn, int is_send ) {
00398         struct ib_work_queue *wq;
00399 
00400         list_for_each_entry ( wq, &cq->work_queues, list ) {
00401                 if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
00402                         return wq;
00403         }
00404         return NULL;
00405 }
00406 
00407 /**
00408  * Post send work queue entry
00409  *
00410  * @v ibdev             Infiniband device
00411  * @v qp                Queue pair
00412  * @v dest              Destination address vector
00413  * @v iobuf             I/O buffer
00414  * @ret rc              Return status code
00415  */
00416 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
00417                    struct ib_address_vector *dest,
00418                    struct io_buffer *iobuf ) {
00419         struct ib_address_vector dest_copy;
00420         int rc;
00421 
00422         /* Start profiling */
00423         profile_start ( &ib_post_send_profiler );
00424 
00425         /* Check queue fill level */
00426         if ( qp->send.fill >= qp->send.num_wqes ) {
00427                 DBGC ( ibdev, "IBDEV %s QPN %#lx send queue full\n",
00428                        ibdev->name, qp->qpn );
00429                 return -ENOBUFS;
00430         }
00431 
00432         /* Use default address vector if none specified */
00433         if ( ! dest )
00434                 dest = &qp->av;
00435 
00436         /* Make modifiable copy of address vector */
00437         memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
00438         dest = &dest_copy;
00439 
00440         /* Fill in optional parameters in address vector */
00441         if ( ! dest->qkey )
00442                 dest->qkey = qp->qkey;
00443         if ( ! dest->rate )
00444                 dest->rate = IB_RATE_2_5;
00445 
00446         /* Post to hardware */
00447         if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
00448                 DBGC ( ibdev, "IBDEV %s QPN %#lx could not post send WQE: "
00449                        "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
00450                 return rc;
00451         }
00452 
00453         /* Increase fill level */
00454         qp->send.fill++;
00455 
00456         /* Stop profiling */
00457         profile_stop ( &ib_post_send_profiler );
00458 
00459         return 0;
00460 }
00461 
00462 /**
00463  * Post receive work queue entry
00464  *
00465  * @v ibdev             Infiniband device
00466  * @v qp                Queue pair
00467  * @v iobuf             I/O buffer
00468  * @ret rc              Return status code
00469  */
00470 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
00471                    struct io_buffer *iobuf ) {
00472         int rc;
00473 
00474         /* Start profiling */
00475         profile_start ( &ib_post_recv_profiler );
00476 
00477         /* Check packet length */
00478         if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
00479                 DBGC ( ibdev, "IBDEV %s QPN %#lx wrong RX buffer size (%zd)\n",
00480                        ibdev->name, qp->qpn, iob_tailroom ( iobuf ) );
00481                 return -EINVAL;
00482         }
00483 
00484         /* Check queue fill level */
00485         if ( qp->recv.fill >= qp->recv.num_wqes ) {
00486                 DBGC ( ibdev, "IBDEV %s QPN %#lx receive queue full\n",
00487                        ibdev->name, qp->qpn );
00488                 return -ENOBUFS;
00489         }
00490 
00491         /* Post to hardware */
00492         if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
00493                 DBGC ( ibdev, "IBDEV %s QPN %#lx could not post receive WQE: "
00494                        "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
00495                 return rc;
00496         }
00497 
00498         /* Increase fill level */
00499         qp->recv.fill++;
00500 
00501         /* Stop profiling */
00502         profile_stop ( &ib_post_recv_profiler );
00503 
00504         return 0;
00505 }
00506 
00507 /**
00508  * Complete send work queue entry
00509  *
00510  * @v ibdev             Infiniband device
00511  * @v qp                Queue pair
00512  * @v iobuf             I/O buffer
00513  * @v rc                Completion status code
00514  */
00515 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
00516                         struct io_buffer *iobuf, int rc ) {
00517 
00518         if ( qp->send.cq->op->complete_send ) {
00519                 qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
00520         } else {
00521                 free_iob ( iobuf );
00522         }
00523         qp->send.fill--;
00524 }
00525 
00526 /**
00527  * Complete receive work queue entry
00528  *
00529  * @v ibdev             Infiniband device
00530  * @v qp                Queue pair
00531  * @v dest              Destination address vector, or NULL
00532  * @v source            Source address vector, or NULL
00533  * @v iobuf             I/O buffer
00534  * @v rc                Completion status code
00535  */
00536 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
00537                         struct ib_address_vector *dest,
00538                         struct ib_address_vector *source,
00539                         struct io_buffer *iobuf, int rc ) {
00540 
00541         if ( qp->recv.cq->op->complete_recv ) {
00542                 qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
00543                                                  iobuf, rc );
00544         } else {
00545                 free_iob ( iobuf );
00546         }
00547         qp->recv.fill--;
00548 }
00549 
00550 /**
00551  * Refill receive work queue
00552  *
00553  * @v ibdev             Infiniband device
00554  * @v qp                Queue pair
00555  */
00556 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
00557         struct io_buffer *iobuf;
00558         int rc;
00559 
00560         /* Keep filling while unfilled entries remain */
00561         while ( qp->recv.fill < qp->recv.num_wqes ) {
00562 
00563                 /* Allocate I/O buffer */
00564                 iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
00565                 if ( ! iobuf ) {
00566                         /* Non-fatal; we will refill on next attempt */
00567                         return;
00568                 }
00569 
00570                 /* Post I/O buffer */
00571                 if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
00572                         DBGC ( ibdev, "IBDEV %s could not refill: %s\n",
00573                                ibdev->name, strerror ( rc ) );
00574                         free_iob ( iobuf );
00575                         /* Give up */
00576                         return;
00577                 }
00578         }
00579 }
00580 
00581 /***************************************************************************
00582  *
00583  * Link control
00584  *
00585  ***************************************************************************
00586  */
00587 
00588 /**
00589  * Get link state
00590  *
00591  * @v ibdev             Infiniband device
00592  * @ret rc              Link status code
00593  */
00594 int ib_link_rc ( struct ib_device *ibdev ) {
00595         switch ( ibdev->port_state ) {
00596         case IB_PORT_STATE_DOWN:        return -ENOTCONN;
00597         case IB_PORT_STATE_INIT:        return -EINPROGRESS_INIT;
00598         case IB_PORT_STATE_ARMED:       return -EINPROGRESS_ARMED;
00599         case IB_PORT_STATE_ACTIVE:      return 0;
00600         default:                        return -EINVAL;
00601         }
00602 }
00603 
00604 /**
00605  * Textual representation of Infiniband link state
00606  *
00607  * @v ibdev             Infiniband device
00608  * @ret link_text       Link state text
00609  */
00610 static const char * ib_link_state_text ( struct ib_device *ibdev ) {
00611         switch ( ibdev->port_state ) {
00612         case IB_PORT_STATE_DOWN:        return "DOWN";
00613         case IB_PORT_STATE_INIT:        return "INIT";
00614         case IB_PORT_STATE_ARMED:       return "ARMED";
00615         case IB_PORT_STATE_ACTIVE:      return "ACTIVE";
00616         default:                        return "UNKNOWN";
00617         }
00618 }
00619 
00620 /**
00621  * Notify drivers of Infiniband device or link state change
00622  *
00623  * @v ibdev             Infiniband device
00624  */
00625 static void ib_notify ( struct ib_device *ibdev ) {
00626         struct ib_driver *driver;
00627 
00628         for_each_table_entry ( driver, IB_DRIVERS )
00629                 driver->notify ( ibdev );
00630 }
00631 
00632 /**
00633  * Notify of Infiniband link state change
00634  *
00635  * @v ibdev             Infiniband device
00636  */
00637 void ib_link_state_changed ( struct ib_device *ibdev ) {
00638 
00639         DBGC ( ibdev, "IBDEV %s link state is %s\n",
00640                ibdev->name, ib_link_state_text ( ibdev ) );
00641 
00642         /* Notify drivers of link state change */
00643         ib_notify ( ibdev );
00644 }
00645 
00646 /**
00647  * Open port
00648  *
00649  * @v ibdev             Infiniband device
00650  * @ret rc              Return status code
00651  */
00652 int ib_open ( struct ib_device *ibdev ) {
00653         int rc;
00654 
00655         /* Increment device open request counter */
00656         if ( ibdev->open_count++ > 0 ) {
00657                 /* Device was already open; do nothing */
00658                 return 0;
00659         }
00660 
00661         /* Open device */
00662         if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
00663                 DBGC ( ibdev, "IBDEV %s could not open: %s\n",
00664                        ibdev->name, strerror ( rc ) );
00665                 goto err_open;
00666         }
00667 
00668         /* Create subnet management interface */
00669         if ( ( rc = ib_create_mi ( ibdev, IB_QPT_SMI, &ibdev->smi ) ) != 0 ) {
00670                 DBGC ( ibdev, "IBDEV %s could not create SMI: %s\n",
00671                        ibdev->name, strerror ( rc ) );
00672                 goto err_create_smi;
00673         }
00674 
00675         /* Create subnet management agent */
00676         if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
00677                 DBGC ( ibdev, "IBDEV %s could not create SMA: %s\n",
00678                        ibdev->name, strerror ( rc ) );
00679                 goto err_create_sma;
00680         }
00681 
00682         /* Create general services interface */
00683         if ( ( rc = ib_create_mi ( ibdev, IB_QPT_GSI, &ibdev->gsi ) ) != 0 ) {
00684                 DBGC ( ibdev, "IBDEV %s could not create GSI: %s\n",
00685                        ibdev->name, strerror ( rc ) );
00686                 goto err_create_gsi;
00687         }
00688 
00689         /* Add to head of open devices list */
00690         list_add ( &ibdev->open_list, &open_ib_devices );
00691 
00692         /* Notify drivers of device state change */
00693         ib_notify ( ibdev );
00694 
00695         assert ( ibdev->open_count == 1 );
00696         return 0;
00697 
00698         ib_destroy_mi ( ibdev, ibdev->gsi );
00699  err_create_gsi:
00700         ib_destroy_sma ( ibdev, ibdev->smi );
00701  err_create_sma:
00702         ib_destroy_mi ( ibdev, ibdev->smi );
00703  err_create_smi:
00704         ibdev->op->close ( ibdev );
00705  err_open:
00706         assert ( ibdev->open_count == 1 );
00707         ibdev->open_count = 0;
00708         return rc;
00709 }
00710 
00711 /**
00712  * Close port
00713  *
00714  * @v ibdev             Infiniband device
00715  */
00716 void ib_close ( struct ib_device *ibdev ) {
00717 
00718         /* Decrement device open request counter */
00719         ibdev->open_count--;
00720 
00721         /* Close device if this was the last remaining requested opening */
00722         if ( ibdev->open_count == 0 ) {
00723                 ib_notify ( ibdev );
00724                 list_del ( &ibdev->open_list );
00725                 ib_destroy_mi ( ibdev, ibdev->gsi );
00726                 ib_destroy_sma ( ibdev, ibdev->smi );
00727                 ib_destroy_mi ( ibdev, ibdev->smi );
00728                 ibdev->op->close ( ibdev );
00729                 ibdev->port_state = IB_PORT_STATE_DOWN;
00730         }
00731 }
00732 
00733 /***************************************************************************
00734  *
00735  * Multicast
00736  *
00737  ***************************************************************************
00738  */
00739 
00740 /**
00741  * Attach to multicast group
00742  *
00743  * @v ibdev             Infiniband device
00744  * @v qp                Queue pair
00745  * @v gid               Multicast GID
00746  * @ret rc              Return status code
00747  *
00748  * Note that this function handles only the local device's attachment
00749  * to the multicast GID; it does not issue the relevant MADs to join
00750  * the multicast group on the subnet.
00751  */
00752 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
00753                       union ib_gid *gid ) {
00754         struct ib_multicast_gid *mgid;
00755         int rc;
00756 
00757         /* Sanity check */
00758         assert ( qp != NULL );
00759 
00760         /* Add to software multicast GID list */
00761         mgid = zalloc ( sizeof ( *mgid ) );
00762         if ( ! mgid ) {
00763                 rc = -ENOMEM;
00764                 goto err_alloc_mgid;
00765         }
00766         memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
00767         list_add_tail ( &mgid->list, &qp->mgids );
00768 
00769         /* Add to hardware multicast GID list */
00770         if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
00771                 goto err_dev_mcast_attach;
00772 
00773         return 0;
00774 
00775  err_dev_mcast_attach:
00776         list_del ( &mgid->list );
00777         free ( mgid );
00778  err_alloc_mgid:
00779         return rc;
00780 }
00781 
00782 /**
00783  * Detach from multicast group
00784  *
00785  * @v ibdev             Infiniband device
00786  * @v qp                Queue pair
00787  * @v gid               Multicast GID
00788  */
00789 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
00790                        union ib_gid *gid ) {
00791         struct ib_multicast_gid *mgid;
00792 
00793         /* Sanity check */
00794         assert ( qp != NULL );
00795 
00796         /* Remove from hardware multicast GID list */
00797         ibdev->op->mcast_detach ( ibdev, qp, gid );
00798 
00799         /* Remove from software multicast GID list */
00800         list_for_each_entry ( mgid, &qp->mgids, list ) {
00801                 if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
00802                         list_del ( &mgid->list );
00803                         free ( mgid );
00804                         break;
00805                 }
00806         }
00807 }
00808 
00809 /***************************************************************************
00810  *
00811  * Miscellaneous
00812  *
00813  ***************************************************************************
00814  */
00815 
00816 /**
00817  * Count Infiniband HCA ports
00818  *
00819  * @v ibdev             Infiniband device
00820  * @ret num_ports       Number of ports
00821  */
00822 int ib_count_ports ( struct ib_device *ibdev ) {
00823         struct ib_device *tmp;
00824         int num_ports = 0;
00825 
00826         /* Search for IB devices with the same physical device to
00827          * identify port count.
00828          */
00829         for_each_ibdev ( tmp ) {
00830                 if ( tmp->dev == ibdev->dev )
00831                         num_ports++;
00832         }
00833         return num_ports;
00834 }
00835 
00836 /**
00837  * Set port information
00838  *
00839  * @v ibdev             Infiniband device
00840  * @v mad               Set port information MAD
00841  */
00842 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
00843         int rc;
00844 
00845         /* Adapters with embedded SMAs do not need to support this method */
00846         if ( ! ibdev->op->set_port_info ) {
00847                 DBGC ( ibdev, "IBDEV %s does not support setting port "
00848                        "information\n", ibdev->name );
00849                 return -ENOTSUP;
00850         }
00851 
00852         if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
00853                 DBGC ( ibdev, "IBDEV %s could not set port information: %s\n",
00854                        ibdev->name, strerror ( rc ) );
00855                 return rc;
00856         }
00857 
00858         return 0;
00859 };
00860 
00861 /**
00862  * Set partition key table
00863  *
00864  * @v ibdev             Infiniband device
00865  * @v mad               Set partition key table MAD
00866  */
00867 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
00868         int rc;
00869 
00870         /* Adapters with embedded SMAs do not need to support this method */
00871         if ( ! ibdev->op->set_pkey_table ) {
00872                 DBGC ( ibdev, "IBDEV %s does not support setting partition "
00873                        "key table\n", ibdev->name );
00874                 return -ENOTSUP;
00875         }
00876 
00877         if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
00878                 DBGC ( ibdev, "IBDEV %s could not set partition key table: "
00879                        "%s\n", ibdev->name, strerror ( rc ) );
00880                 return rc;
00881         }
00882 
00883         return 0;
00884 };
00885 
00886 /***************************************************************************
00887  *
00888  * Event queues
00889  *
00890  ***************************************************************************
00891  */
00892 
00893 /**
00894  * Poll event queue
00895  *
00896  * @v ibdev             Infiniband device
00897  */
00898 void ib_poll_eq ( struct ib_device *ibdev ) {
00899         struct ib_completion_queue *cq;
00900 
00901         /* Poll device's event queue */
00902         ibdev->op->poll_eq ( ibdev );
00903 
00904         /* Poll all completion queues */
00905         list_for_each_entry ( cq, &ibdev->cqs, list )
00906                 ib_poll_cq ( ibdev, cq );
00907 }
00908 
00909 /**
00910  * Single-step the Infiniband event queue
00911  *
00912  * @v process           Infiniband event queue process
00913  */
00914 static void ib_step ( struct process *process __unused ) {
00915         struct ib_device *ibdev;
00916 
00917         list_for_each_entry ( ibdev, &open_ib_devices, open_list )
00918                 ib_poll_eq ( ibdev );
00919 }
00920 
00921 /** Infiniband event queue process */
00922 PERMANENT_PROCESS ( ib_process, ib_step );
00923 
00924 /***************************************************************************
00925  *
00926  * Infiniband device creation/destruction
00927  *
00928  ***************************************************************************
00929  */
00930 
00931 /**
00932  * Allocate Infiniband device
00933  *
00934  * @v priv_size         Size of driver private data area
00935  * @ret ibdev           Infiniband device, or NULL
00936  */
00937 struct ib_device * alloc_ibdev ( size_t priv_size ) {
00938         struct ib_device *ibdev;
00939         void *drv_priv;
00940         size_t total_len;
00941 
00942         total_len = ( sizeof ( *ibdev ) + priv_size );
00943         ibdev = zalloc ( total_len );
00944         if ( ibdev ) {
00945                 drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
00946                 ib_set_drvdata ( ibdev, drv_priv );
00947                 INIT_LIST_HEAD ( &ibdev->list );
00948                 INIT_LIST_HEAD ( &ibdev->open_list );
00949                 INIT_LIST_HEAD ( &ibdev->cqs );
00950                 INIT_LIST_HEAD ( &ibdev->qps );
00951                 ibdev->port_state = IB_PORT_STATE_DOWN;
00952                 ibdev->lid = IB_LID_NONE;
00953                 ibdev->pkey = IB_PKEY_DEFAULT;
00954         }
00955         return ibdev;
00956 }
00957 
00958 /**
00959  * Register Infiniband device
00960  *
00961  * @v ibdev             Infiniband device
00962  * @ret rc              Return status code
00963  */
00964 int register_ibdev ( struct ib_device *ibdev ) {
00965         struct ib_driver *driver;
00966         int rc;
00967 
00968         /* Record device index and create device name */
00969         if ( ibdev->name[0] == '\0' ) {
00970                 snprintf ( ibdev->name, sizeof ( ibdev->name ), "inf%d",
00971                            ibdev_index );
00972         }
00973         ibdev->index = ++ibdev_index;
00974 
00975         /* Add to device list */
00976         ibdev_get ( ibdev );
00977         list_add_tail ( &ibdev->list, &ib_devices );
00978         DBGC ( ibdev, "IBDEV %s registered (phys %s)\n", ibdev->name,
00979                ibdev->dev->name );
00980 
00981         /* Probe device */
00982         for_each_table_entry ( driver, IB_DRIVERS ) {
00983                 if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
00984                         DBGC ( ibdev, "IBDEV %s could not add %s device: %s\n",
00985                                ibdev->name, driver->name, strerror ( rc ) );
00986                         goto err_probe;
00987                 }
00988         }
00989 
00990         return 0;
00991 
00992  err_probe:
00993         for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
00994                 driver->remove ( ibdev );
00995         list_del ( &ibdev->list );
00996         ibdev_put ( ibdev );
00997         return rc;
00998 }
00999 
01000 /**
01001  * Unregister Infiniband device
01002  *
01003  * @v ibdev             Infiniband device
01004  */
01005 void unregister_ibdev ( struct ib_device *ibdev ) {
01006         struct ib_driver *driver;
01007 
01008         /* Remove device */
01009         for_each_table_entry_reverse ( driver, IB_DRIVERS )
01010                 driver->remove ( ibdev );
01011 
01012         /* Remove from device list */
01013         list_del ( &ibdev->list );
01014         ibdev_put ( ibdev );
01015         DBGC ( ibdev, "IBDEV %s unregistered\n", ibdev->name );
01016 
01017         /* Reset device index if no devices remain */
01018         if ( list_empty ( &ib_devices ) )
01019                 ibdev_index = 0;
01020 }
01021 
01022 /**
01023  * Find Infiniband device by GID
01024  *
01025  * @v gid               GID
01026  * @ret ibdev           Infiniband device, or NULL
01027  */
01028 struct ib_device * find_ibdev ( union ib_gid *gid ) {
01029         struct ib_device *ibdev;
01030 
01031         for_each_ibdev ( ibdev ) {
01032                 if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
01033                         return ibdev;
01034         }
01035         return NULL;
01036 }
01037 
01038 /**
01039  * Get most recently opened Infiniband device
01040  *
01041  * @ret ibdev           Most recently opened Infiniband device, or NULL
01042  */
01043 struct ib_device * last_opened_ibdev ( void ) {
01044         struct ib_device *ibdev;
01045 
01046         ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
01047                                    open_list );
01048         if ( ! ibdev )
01049                 return NULL;
01050 
01051         assert ( ibdev->open_count != 0 );
01052         return ibdev;
01053 }
01054 
01055 /* Drag in objects via register_ibdev() */
01056 REQUIRING_SYMBOL ( register_ibdev );
01057 
01058 /* Drag in Infiniband configuration */
01059 REQUIRE_OBJECT ( config_infiniband );