iPXE
ib_cm.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License as
00006  * published by the Free Software Foundation; either version 2 of the
00007  * License, or any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00017  * 02110-1301, USA.
00018  *
00019  * You can also choose to distribute this program under the terms of
00020  * the Unmodified Binary Distribution Licence (as given in the file
00021  * COPYING.UBDL), provided that you have satisfied its requirements.
00022  */
00023 
00024 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
00025 
00026 #include <stdint.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <byteswap.h>
00030 #include <errno.h>
00031 #include <assert.h>
00032 #include <ipxe/infiniband.h>
00033 #include <ipxe/ib_mi.h>
00034 #include <ipxe/ib_pathrec.h>
00035 #include <ipxe/ib_cm.h>
00036 
00037 /**
00038  * @file
00039  *
00040  * Infiniband communication management
00041  *
00042  */
00043 
00044 /** List of connections */
00045 static LIST_HEAD ( ib_cm_conns );
00046 
00047 /**
00048  * Find connection by local communication ID
00049  *
00050  * @v local_id          Local communication ID
00051  * @ret conn            Connection, or NULL
00052  */
00053 static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
00054         struct ib_connection *conn;
00055 
00056         list_for_each_entry ( conn, &ib_cm_conns, list ) {
00057                 if ( conn->local_id == local_id )
00058                         return conn;
00059         }
00060         return NULL;
00061 }
00062 
00063 /**
00064  * Send "ready to use" response
00065  *
00066  * @v ibdev             Infiniband device
00067  * @v mi                Management interface
00068  * @v tid               Transaction identifier
00069  * @v av                Address vector
00070  * @v local_id          Local communication ID
00071  * @v remote_id         Remote communication ID
00072  * @ret rc              Return status code
00073  */
00074 static int ib_cm_send_rtu ( struct ib_device *ibdev,
00075                             struct ib_mad_interface *mi,
00076                             struct ib_mad_tid *tid,
00077                             struct ib_address_vector *av,
00078                             uint32_t local_id, uint32_t remote_id ) {
00079         union ib_mad mad;
00080         struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
00081         int rc;
00082 
00083         /* Construct "ready to use" response */
00084         memset ( &mad, 0, sizeof ( mad ) );
00085         mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
00086         mad.hdr.class_version = IB_CM_CLASS_VERSION;
00087         mad.hdr.method = IB_MGMT_METHOD_SEND;
00088         memcpy ( &mad.hdr.tid, tid, sizeof ( mad.hdr.tid ) );
00089         mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
00090         rtu->local_id = htonl ( local_id );
00091         rtu->remote_id = htonl ( remote_id );
00092         if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
00093                 DBGC ( local_id, "CM %08x could not send RTU: %s\n",
00094                        local_id, strerror ( rc ) );
00095                 return rc;
00096         }
00097 
00098         return 0;
00099 }
00100 
00101 /**
00102  * Handle duplicate connection replies
00103  *
00104  * @v ibdev             Infiniband device
00105  * @v mi                Management interface
00106  * @v mad               Received MAD
00107  * @v av                Source address vector
00108  * @ret rc              Return status code
00109  *
00110  * If a "ready to use" MAD is lost, the peer may resend the connection
00111  * reply.  We have to respond to these with duplicate "ready to use"
00112  * MADs, otherwise the peer may time out and drop the connection.
00113  */
00114 static void ib_cm_recv_rep ( struct ib_device *ibdev,
00115                              struct ib_mad_interface *mi,
00116                              union ib_mad *mad,
00117                              struct ib_address_vector *av ) {
00118         struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
00119         struct ib_connection *conn;
00120         uint32_t local_id = ntohl ( rep->remote_id );
00121         int rc;
00122 
00123         /* Identify connection */
00124         conn = ib_cm_find ( local_id );
00125         if ( conn ) {
00126                 /* Try to send "ready to use" reply */
00127                 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, &mad->hdr.tid, av,
00128                                              conn->local_id,
00129                                              conn->remote_id ) ) != 0 ) {
00130                         /* Ignore errors; the remote end will retry */
00131                 }
00132         } else {
00133                 DBGC ( local_id, "CM %08x unexpected REP\n", local_id );
00134         }
00135 }
00136 
00137 /**
00138  * Send reply to disconnection request
00139  *
00140  * @v ibdev             Infiniband device
00141  * @v mi                Management interface
00142  * @v tid               Transaction identifier
00143  * @v av                Address vector
00144  * @v local_id          Local communication ID
00145  * @v remote_id         Remote communication ID
00146  * @ret rc              Return status code
00147  */
00148 static int ib_cm_send_drep ( struct ib_device *ibdev,
00149                              struct ib_mad_interface *mi,
00150                              struct ib_mad_tid *tid,
00151                              struct ib_address_vector *av,
00152                              uint32_t local_id, uint32_t remote_id ) {
00153         union ib_mad mad;
00154         struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
00155         int rc;
00156 
00157         /* Construct reply to disconnection request */
00158         memset ( &mad, 0, sizeof ( mad ) );
00159         mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
00160         mad.hdr.class_version = IB_CM_CLASS_VERSION;
00161         mad.hdr.method = IB_MGMT_METHOD_SEND;
00162         memcpy ( &mad.hdr.tid, tid, sizeof ( mad.hdr.tid ) );
00163         mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
00164         drep->local_id = htonl ( local_id );
00165         drep->remote_id = htonl ( remote_id );
00166         if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
00167                 DBGC ( local_id, "CM %08x could not send DREP: %s\n",
00168                        local_id, strerror ( rc ) );
00169                 return rc;
00170         }
00171 
00172         return 0;
00173 }
00174 
00175 /**
00176  * Handle disconnection requests
00177  *
00178  * @v ibdev             Infiniband device
00179  * @v mi                Management interface
00180  * @v mad               Received MAD
00181  * @v av                Source address vector
00182  * @ret rc              Return status code
00183  */
00184 static void ib_cm_recv_dreq ( struct ib_device *ibdev,
00185                               struct ib_mad_interface *mi,
00186                               union ib_mad *mad,
00187                               struct ib_address_vector *av ) {
00188         struct ib_cm_disconnect_request *dreq =
00189                 &mad->cm.cm_data.disconnect_request;
00190         struct ib_connection *conn;
00191         uint32_t local_id = ntohl ( dreq->remote_id );
00192         uint32_t remote_id = ntohl ( dreq->local_id );
00193         int rc;
00194 
00195         /* Identify connection */
00196         conn = ib_cm_find ( local_id );
00197         if ( conn ) {
00198                 /* Notify upper layer */
00199                 conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
00200                                     &dreq->private_data,
00201                                     sizeof ( dreq->private_data ) );
00202         } else {
00203                 DBGC ( local_id, "CM %08x unexpected DREQ\n", local_id );
00204         }
00205 
00206         /* Send reply */
00207         if ( ( rc = ib_cm_send_drep ( ibdev, mi, &mad->hdr.tid, av, local_id,
00208                                       remote_id ) ) != 0 ) {
00209                 /* Ignore errors; the remote end will retry */
00210         }
00211 };
00212 
00213 /** Communication management agents */
00214 struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
00215         {
00216                 .mgmt_class = IB_MGMT_CLASS_CM,
00217                 .class_version = IB_CM_CLASS_VERSION,
00218                 .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
00219                 .handle = ib_cm_recv_rep,
00220         },
00221         {
00222                 .mgmt_class = IB_MGMT_CLASS_CM,
00223                 .class_version = IB_CM_CLASS_VERSION,
00224                 .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
00225                 .handle = ib_cm_recv_dreq,
00226         },
00227 };
00228 
00229 /**
00230  * Convert connection rejection reason to return status code
00231  *
00232  * @v reason            Rejection reason (in network byte order)
00233  * @ret rc              Return status code
00234  */
00235 static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
00236         switch ( reason ) {
00237         case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
00238                 return -ENODEV;
00239         case htons ( IB_CM_REJECT_STALE_CONN ) :
00240                 return -EALREADY;
00241         case htons ( IB_CM_REJECT_CONSUMER ) :
00242                 return -ENOTTY;
00243         default:
00244                 return -EPERM;
00245         }
00246 }
00247 
00248 /**
00249  * Handle connection request transaction completion
00250  *
00251  * @v ibdev             Infiniband device
00252  * @v mi                Management interface
00253  * @v madx              Management transaction
00254  * @v rc                Status code
00255  * @v mad               Received MAD (or NULL on error)
00256  * @v av                Source address vector (or NULL on error)
00257  */
00258 static void ib_cm_req_complete ( struct ib_device *ibdev,
00259                                  struct ib_mad_interface *mi,
00260                                  struct ib_mad_transaction *madx,
00261                                  int rc, union ib_mad *mad,
00262                                  struct ib_address_vector *av ) {
00263         struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
00264         struct ib_queue_pair *qp = conn->qp;
00265         struct ib_cm_common *common = &mad->cm.cm_data.common;
00266         struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
00267         struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
00268         uint32_t local_id = conn->local_id;
00269         void *private_data = NULL;
00270         size_t private_data_len = 0;
00271 
00272         /* Report failures */
00273         if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
00274                 rc = -EIO;
00275         if ( rc != 0 ) {
00276                 DBGC ( local_id, "CM %08x connection request failed: %s\n",
00277                        local_id, strerror ( rc ) );
00278                 goto out;
00279         }
00280 
00281         /* Record remote communication ID */
00282         conn->remote_id = ntohl ( common->local_id );
00283 
00284         /* Handle response */
00285         switch ( mad->hdr.attr_id ) {
00286 
00287         case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
00288                 /* Extract fields */
00289                 qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
00290                 qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
00291                 private_data = &rep->private_data;
00292                 private_data_len = sizeof ( rep->private_data );
00293                 DBGC ( local_id, "CM %08x connected to QPN %#lx PSN %#x\n",
00294                        local_id, qp->av.qpn, qp->send.psn );
00295 
00296                 /* Modify queue pair */
00297                 if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
00298                         DBGC ( local_id, "CM %08x could not modify queue "
00299                                "pair: %s\n", local_id, strerror ( rc ) );
00300                         goto out;
00301                 }
00302 
00303                 /* Send "ready to use" reply */
00304                 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, &mad->hdr.tid, av,
00305                                              conn->local_id,
00306                                              conn->remote_id ) ) != 0 ) {
00307                         /* Treat as non-fatal */
00308                         rc = 0;
00309                 }
00310                 break;
00311 
00312         case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
00313                 /* Extract fields */
00314                 DBGC ( local_id, "CM %08x connection rejected (reason %d)\n",
00315                        local_id, ntohs ( rej->reason ) );
00316                 /* Private data is valid only for a Consumer Reject */
00317                 if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
00318                         private_data = &rej->private_data;
00319                         private_data_len = sizeof ( rej->private_data );
00320                 }
00321                 rc = ib_cm_rejection_reason_to_rc ( rej->reason );
00322                 break;
00323 
00324         default:
00325                 DBGC ( local_id, "CM %08x unexpected response (attribute "
00326                        "%04x)\n", local_id, ntohs ( mad->hdr.attr_id ) );
00327                 rc = -ENOTSUP;
00328                 break;
00329         }
00330 
00331  out:
00332         /* Destroy the completed transaction */
00333         ib_destroy_madx ( ibdev, ibdev->gsi, madx );
00334         conn->madx = NULL;
00335 
00336         /* Hand off to the upper completion handler */
00337         conn->op->changed ( ibdev, qp, conn, rc, private_data,
00338                             private_data_len );
00339 }
00340 
00341 /** Connection request operations */
00342 static struct ib_mad_transaction_operations ib_cm_req_op = {
00343         .complete = ib_cm_req_complete,
00344 };
00345 
00346 /**
00347  * Handle connection path transaction completion
00348  *
00349  * @v ibdev             Infiniband device
00350  * @v path              Path
00351  * @v rc                Status code
00352  * @v av                Address vector, or NULL on error
00353  */
00354 static void ib_cm_path_complete ( struct ib_device *ibdev,
00355                                   struct ib_path *path, int rc,
00356                                   struct ib_address_vector *av ) {
00357         struct ib_connection *conn = ib_path_get_ownerdata ( path );
00358         struct ib_queue_pair *qp = conn->qp;
00359         union ib_mad mad;
00360         struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
00361         uint32_t local_id = conn->local_id;
00362         size_t private_data_len;
00363 
00364         /* Report failures */
00365         if ( rc != 0 ) {
00366                 DBGC ( local_id, "CM %08x path lookup failed: %s\n",
00367                        local_id, strerror ( rc ) );
00368                 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
00369                 goto out;
00370         }
00371 
00372         /* Update queue pair peer path */
00373         memcpy ( &qp->av, av, sizeof ( qp->av ) );
00374 
00375         /* Construct connection request */
00376         memset ( &mad, 0, sizeof ( mad ) );
00377         mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
00378         mad.hdr.class_version = IB_CM_CLASS_VERSION;
00379         mad.hdr.method = IB_MGMT_METHOD_SEND;
00380         mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
00381         req->local_id = htonl ( conn->local_id );
00382         memcpy ( &req->service_id, &conn->service_id,
00383                  sizeof ( req->service_id ) );
00384         memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
00385         req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
00386         req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
00387         req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
00388                 htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
00389                         ( 0 << 0 ) );
00390         req->starting_psn__local_timeout__retry_count =
00391                 htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
00392                         ( 0x07 << 0 ) );
00393         req->pkey = htons ( ibdev->pkey );
00394         req->payload_mtu__rdc_exists__rnr_retry =
00395                 ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
00396         req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
00397         req->primary.local_lid = htons ( ibdev->lid );
00398         req->primary.remote_lid = htons ( conn->qp->av.lid );
00399         memcpy ( &req->primary.local_gid, &ibdev->gid,
00400                  sizeof ( req->primary.local_gid ) );
00401         memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
00402                  sizeof ( req->primary.remote_gid ) );
00403         req->primary.flow_label__rate =
00404                 htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
00405         req->primary.hop_limit = 0;
00406         req->primary.sl__subnet_local =
00407                 ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
00408         req->primary.local_ack_timeout = ( 0x13 << 3 );
00409         private_data_len = conn->private_data_len;
00410         if ( private_data_len > sizeof ( req->private_data ) )
00411                 private_data_len = sizeof ( req->private_data );
00412         memcpy ( &req->private_data, &conn->private_data, private_data_len );
00413 
00414         /* Create connection request */
00415         av->qpn = IB_QPN_GSI;
00416         av->qkey = IB_QKEY_GSI;
00417         conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
00418                                       &ib_cm_req_op );
00419         if ( ! conn->madx ) {
00420                 DBGC ( local_id, "CM %08x could not create connection "
00421                        "request\n", local_id );
00422                 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
00423                 goto out;
00424         }
00425         ib_madx_set_ownerdata ( conn->madx, conn );
00426 
00427  out:
00428         /* Destroy the completed transaction */
00429         ib_destroy_path ( ibdev, path );
00430         conn->path = NULL;
00431 }
00432 
00433 /** Connection path operations */
00434 static struct ib_path_operations ib_cm_path_op = {
00435         .complete = ib_cm_path_complete,
00436 };
00437 
00438 /**
00439  * Create connection to remote QP
00440  *
00441  * @v ibdev             Infiniband device
00442  * @v qp                Queue pair
00443  * @v dgid              Target GID
00444  * @v service_id        Target service ID
00445  * @v private_data      Connection request private data
00446  * @v private_data_len  Length of connection request private data
00447  * @v op                Connection operations
00448  * @ret conn            Connection
00449  */
00450 struct ib_connection *
00451 ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
00452                  union ib_gid *dgid, union ib_guid *service_id,
00453                  void *private_data, size_t private_data_len,
00454                  struct ib_connection_operations *op ) {
00455         struct ib_connection *conn;
00456         uint32_t local_id;
00457 
00458         /* Allocate and initialise request */
00459         conn = zalloc ( sizeof ( *conn ) + private_data_len );
00460         if ( ! conn )
00461                 goto err_alloc_conn;
00462         conn->ibdev = ibdev;
00463         conn->qp = qp;
00464         memset ( &qp->av, 0, sizeof ( qp->av ) );
00465         qp->av.gid_present = 1;
00466         memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
00467         conn->local_id = local_id = random();
00468         memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
00469         conn->op = op;
00470         conn->private_data_len = private_data_len;
00471         memcpy ( &conn->private_data, private_data, private_data_len );
00472 
00473         /* Create path */
00474         conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
00475         if ( ! conn->path )
00476                 goto err_create_path;
00477         ib_path_set_ownerdata ( conn->path, conn );
00478 
00479         /* Add to list of connections */
00480         list_add ( &conn->list, &ib_cm_conns );
00481 
00482         DBGC ( local_id, "CM %08x created for IBDEV %s QPN %#lx\n",
00483                local_id, ibdev->name, qp->qpn );
00484         DBGC ( local_id, "CM %08x connecting to " IB_GID_FMT " "
00485                IB_GUID_FMT "\n", local_id, IB_GID_ARGS ( dgid ),
00486                IB_GUID_ARGS ( service_id ) );
00487 
00488         return conn;
00489 
00490         ib_destroy_path ( ibdev, conn->path );
00491  err_create_path:
00492         free ( conn );
00493  err_alloc_conn:
00494         return NULL;
00495 }
00496 
00497 /**
00498  * Destroy connection to remote QP
00499  *
00500  * @v ibdev             Infiniband device
00501  * @v qp                Queue pair
00502  * @v conn              Connection
00503  */
00504 void ib_destroy_conn ( struct ib_device *ibdev,
00505                        struct ib_queue_pair *qp __unused,
00506                        struct ib_connection *conn ) {
00507 
00508         list_del ( &conn->list );
00509         if ( conn->madx )
00510                 ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
00511         if ( conn->path )
00512                 ib_destroy_path ( ibdev, conn->path );
00513         free ( conn );
00514 }