iPXE
tcp.c
Go to the documentation of this file.
1 #include <string.h>
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <assert.h>
5 #include <errno.h>
6 #include <byteswap.h>
7 #include <ipxe/timer.h>
8 #include <ipxe/iobuf.h>
9 #include <ipxe/malloc.h>
10 #include <ipxe/init.h>
11 #include <ipxe/retry.h>
12 #include <ipxe/refcnt.h>
13 #include <ipxe/pending.h>
14 #include <ipxe/xfer.h>
15 #include <ipxe/open.h>
16 #include <ipxe/uri.h>
17 #include <ipxe/netdevice.h>
18 #include <ipxe/profile.h>
19 #include <ipxe/process.h>
20 #include <ipxe/job.h>
21 #include <ipxe/tcpip.h>
22 #include <ipxe/tcp.h>
23 
24 /** @file
25  *
26  * TCP protocol
27  *
28  */
29 
30 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
31 FILE_SECBOOT ( PERMITTED );
32 
33 /** A TCP connection */
35  /** Reference counter */
36  struct refcnt refcnt;
37  /** List of TCP connections */
38  struct list_head list;
39 
40  /** Flags */
41  unsigned int flags;
42 
43  /** Data transfer interface */
44  struct interface xfer;
45 
46  /** Remote socket address */
48  /** Local port */
49  unsigned int local_port;
50  /** Maximum segment size */
51  size_t mss;
52 
53  /** Current TCP state */
54  unsigned int tcp_state;
55  /** Previous TCP state
56  *
57  * Maintained only for debug messages
58  */
59  unsigned int prev_tcp_state;
60  /** Current sequence number
61  *
62  * Equivalent to SND.UNA in RFC 793 terminology.
63  */
65  /** Unacknowledged sequence count
66  *
67  * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology.
68  */
70  /** Send window
71  *
72  * Equivalent to SND.WND in RFC 793 terminology
73  */
75  /** Current acknowledgement number
76  *
77  * Equivalent to RCV.NXT in RFC 793 terminology.
78  */
80  /** Receive window
81  *
82  * Equivalent to RCV.WND in RFC 793 terminology.
83  */
85  /** Received timestamp value
86  *
87  * Updated when a packet is received; copied to ts_recent when
88  * the window is advanced.
89  */
91  /** Most recent received timestamp that advanced the window
92  *
93  * Equivalent to TS.Recent in RFC 1323 terminology.
94  */
96  /** Send window scale
97  *
98  * Equivalent to Snd.Wind.Scale in RFC 1323 terminology
99  */
101  /** Receive window scale
102  *
103  * Equivalent to Rcv.Wind.Scale in RFC 1323 terminology
104  */
106 
107  /** Selective acknowledgement list (in host-endian order) */
109 
110  /** Transmit queue */
112  /** Receive queue */
114  /** Transmission process */
115  struct process process;
116  /** Retransmission timer */
118  /** Keepalive timer */
120  /** Shutdown (TIME_WAIT) timer */
122 
123  /** Pending operations for SYN and FIN */
125  /** Pending operations for transmit queue */
127 };
128 
129 /** TCP flags */
130 enum tcp_flags {
131  /** TCP data transfer interface has been closed */
132  TCP_XFER_CLOSED = 0x0001,
133  /** TCP timestamps are enabled */
134  TCP_TS_ENABLED = 0x0002,
135  /** TCP acknowledgement is pending */
136  TCP_ACK_PENDING = 0x0004,
137  /** TCP selective acknowledgement is enabled */
139 };
140 
141 /** TCP internal header
142  *
143  * This is the header that replaces the TCP header for packets
144  * enqueued on the receive queue.
145  */
147  /** SEQ value, in host-endian order
148  *
149  * This represents the SEQ value at the time the packet is
150  * enqueued, and so excludes the SYN, if present.
151  */
153  /** Next SEQ value, in host-endian order */
155  /** Flags
156  *
157  * Only FIN is valid within this flags byte; all other flags
158  * have already been processed by the time the packet is
159  * enqueued.
160  */
162  /** Reserved */
164 };
165 
166 /**
167  * List of registered TCP connections
168  */
169 static LIST_HEAD ( tcp_conns );
170 
171 /** TCP statistics */
173 
174 /** Transmit profiler */
175 static struct profiler tcp_tx_profiler __profiler = { .name = "tcp.tx" };
176 
177 /** Receive profiler */
178 static struct profiler tcp_rx_profiler __profiler = { .name = "tcp.rx" };
179 
180 /** Data transfer profiler */
181 static struct profiler tcp_xfer_profiler __profiler = { .name = "tcp.xfer" };
182 
183 /* Forward declarations */
186 static void tcp_expired ( struct retry_timer *timer, int over );
187 static void tcp_keepalive_expired ( struct retry_timer *timer, int over );
188 static void tcp_wait_expired ( struct retry_timer *timer, int over );
189 static struct tcp_connection * tcp_demux ( unsigned int local_port );
190 static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
191  uint32_t win );
192 
193 /**
194  * Name TCP state
195  *
196  * @v state TCP state
197  * @ret name Name of TCP state
198  */
199 static inline __attribute__ (( always_inline )) const char *
200 tcp_state ( int state ) {
201  switch ( state ) {
202  case TCP_CLOSED: return "CLOSED";
203  case TCP_LISTEN: return "LISTEN";
204  case TCP_SYN_SENT: return "SYN_SENT";
205  case TCP_SYN_RCVD: return "SYN_RCVD";
206  case TCP_ESTABLISHED: return "ESTABLISHED";
207  case TCP_FIN_WAIT_1: return "FIN_WAIT_1";
208  case TCP_FIN_WAIT_2: return "FIN_WAIT_2";
209  case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK";
210  case TCP_TIME_WAIT: return "TIME_WAIT";
211  case TCP_CLOSE_WAIT: return "CLOSE_WAIT";
212  default: return "INVALID";
213  }
214 }
215 
216 /**
217  * Dump TCP state transition
218  *
219  * @v tcp TCP connection
220  */
221 static inline __attribute__ (( always_inline )) void
223 
224  if ( tcp->tcp_state != tcp->prev_tcp_state ) {
225  DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp,
226  tcp_state ( tcp->prev_tcp_state ),
227  tcp_state ( tcp->tcp_state ) );
228  }
229  tcp->prev_tcp_state = tcp->tcp_state;
230 }
231 
232 /**
233  * Dump TCP flags
234  *
235  * @v flags TCP flags
236  */
237 static inline __attribute__ (( always_inline )) void
238 tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) {
239  if ( flags & TCP_RST )
240  DBGC2 ( tcp, " RST" );
241  if ( flags & TCP_SYN )
242  DBGC2 ( tcp, " SYN" );
243  if ( flags & TCP_PSH )
244  DBGC2 ( tcp, " PSH" );
245  if ( flags & TCP_FIN )
246  DBGC2 ( tcp, " FIN" );
247  if ( flags & TCP_ACK )
248  DBGC2 ( tcp, " ACK" );
249 }
250 
251 /***************************************************************************
252  *
253  * Open and close
254  *
255  ***************************************************************************
256  */
257 
258 /**
259  * Check if local TCP port is available
260  *
261  * @v port Local port number
262  * @ret port Local port number, or negative error
263  */
264 static int tcp_port_available ( int port ) {
265 
266  return ( tcp_demux ( port ) ? -EADDRINUSE : port );
267 }
268 
269 /**
270  * Open a TCP connection
271  *
272  * @v xfer Data transfer interface
273  * @v peer Peer socket address
274  * @v local Local socket address, or NULL
275  * @ret rc Return status code
276  */
277 static int tcp_open ( struct interface *xfer, struct sockaddr *peer,
278  struct sockaddr *local ) {
279  struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer;
280  struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local;
281  struct tcp_connection *tcp;
282  size_t mtu;
283  int port;
284  int rc;
285 
286  /* Allocate and initialise structure */
287  tcp = zalloc ( sizeof ( *tcp ) );
288  if ( ! tcp )
289  return -ENOMEM;
290  DBGC ( tcp, "TCP %p allocated\n", tcp );
291  ref_init ( &tcp->refcnt, NULL );
292  intf_init ( &tcp->xfer, &tcp_xfer_desc, &tcp->refcnt );
294  timer_init ( &tcp->timer, tcp_expired, &tcp->refcnt );
295  timer_init ( &tcp->keepalive, tcp_keepalive_expired, &tcp->refcnt );
296  timer_init ( &tcp->wait, tcp_wait_expired, &tcp->refcnt );
297  tcp->prev_tcp_state = TCP_CLOSED;
298  tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN );
299  tcp_dump_state ( tcp );
300  tcp->snd_seq = random();
301  INIT_LIST_HEAD ( &tcp->tx_queue );
302  INIT_LIST_HEAD ( &tcp->rx_queue );
303  memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) );
304 
305  /* Calculate MSS */
306  mtu = tcpip_mtu ( &tcp->peer );
307  if ( ! mtu ) {
308  DBGC ( tcp, "TCP %p has no route to %s\n",
309  tcp, sock_ntoa ( peer ) );
310  rc = -ENETUNREACH;
311  goto err;
312  }
313  tcp->mss = ( mtu - sizeof ( struct tcp_header ) );
314 
315  /* Bind to local port */
316  port = tcpip_bind ( st_local, tcp_port_available );
317  if ( port < 0 ) {
318  rc = port;
319  DBGC ( tcp, "TCP %p could not bind: %s\n",
320  tcp, strerror ( rc ) );
321  goto err;
322  }
323  tcp->local_port = port;
324  DBGC ( tcp, "TCP %p bound to port %d\n", tcp, tcp->local_port );
325 
326  /* Start timer to initiate SYN */
327  start_timer_nodelay ( &tcp->timer );
328 
329  /* Add a pending operation for the SYN */
330  pending_get ( &tcp->pending_flags );
331 
332  /* Attach parent interface, transfer reference to connection
333  * list and return
334  */
335  intf_plug_plug ( &tcp->xfer, xfer );
336  list_add ( &tcp->list, &tcp_conns );
337  return 0;
338 
339  err:
340  ref_put ( &tcp->refcnt );
341  return rc;
342 }
343 
344 /**
345  * Close TCP connection
346  *
347  * @v tcp TCP connection
348  * @v rc Reason for close
349  *
350  * Closes the data transfer interface. If the TCP state machine is in
351  * a suitable state, the connection will be deleted.
352  */
353 static void tcp_close ( struct tcp_connection *tcp, int rc ) {
354  struct io_buffer *iobuf;
355  struct io_buffer *tmp;
356 
357  /* Close data transfer interface */
358  intf_shutdown ( &tcp->xfer, rc );
359  tcp->flags |= TCP_XFER_CLOSED;
360 
361  /* If we are in CLOSED, or have otherwise not yet received a
362  * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the
363  * connection.
364  */
365  if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
366 
367  /* Transition to CLOSED for the sake of debugging messages */
368  tcp->tcp_state = TCP_CLOSED;
369  tcp_dump_state ( tcp );
370 
371  /* Free any unprocessed I/O buffers */
372  list_for_each_entry_safe ( iobuf, tmp, &tcp->rx_queue, list ) {
373  list_del ( &iobuf->list );
374  free_iob ( iobuf );
375  }
376 
377  /* Free any unsent I/O buffers */
378  list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) {
379  list_del ( &iobuf->list );
380  free_iob ( iobuf );
381  pending_put ( &tcp->pending_data );
382  }
383  assert ( ! is_pending ( &tcp->pending_data ) );
384 
385  /* Remove pending operations for SYN and FIN, if applicable */
386  pending_put ( &tcp->pending_flags );
387  pending_put ( &tcp->pending_flags );
388 
389  /* Remove from list and drop reference */
390  process_del ( &tcp->process );
391  stop_timer ( &tcp->timer );
392  stop_timer ( &tcp->keepalive );
393  stop_timer ( &tcp->wait );
394  list_del ( &tcp->list );
395  ref_put ( &tcp->refcnt );
396  DBGC ( tcp, "TCP %p connection deleted\n", tcp );
397  return;
398  }
399 
400  /* If we have not had our SYN acknowledged (i.e. we are in
401  * SYN_RCVD), pretend that it has been acknowledged so that we
402  * can send a FIN without breaking things.
403  */
404  if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
405  tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 );
406 
407  /* Stop keepalive timer */
408  stop_timer ( &tcp->keepalive );
409 
410  /* If we have no data remaining to send, start sending FIN */
411  if ( list_empty ( &tcp->tx_queue ) &&
412  ! ( tcp->tcp_state & TCP_STATE_SENT ( TCP_FIN ) ) ) {
413 
414  tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
415  tcp_dump_state ( tcp );
416  process_add ( &tcp->process );
417 
418  /* Add a pending operation for the FIN */
419  pending_get ( &tcp->pending_flags );
420  }
421 }
422 
423 /***************************************************************************
424  *
425  * Transmit data path
426  *
427  ***************************************************************************
428  */
429 
430 /**
431  * Calculate transmission window
432  *
433  * @v tcp TCP connection
434  * @ret len Maximum length that can be sent in a single packet
435  */
436 static size_t tcp_xmit_win ( struct tcp_connection *tcp ) {
437  size_t len;
438 
439  /* Not ready if we're not in a suitable connection state */
440  if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) )
441  return 0;
442 
443  /* Length is the minimum of the receiver's window and the path MTU */
444  len = tcp->snd_win;
445  if ( len > TCP_PATH_MTU )
446  len = TCP_PATH_MTU;
447 
448  return len;
449 }
450 
451 /**
452  * Check data-transfer flow control window
453  *
454  * @v tcp TCP connection
455  * @ret len Length of window
456  */
457 static size_t tcp_xfer_window ( struct tcp_connection *tcp ) {
458 
459  /* Not ready if data queue is non-empty. This imposes a limit
460  * of only one unACKed packet in the TX queue at any time; we
461  * do this to conserve memory usage.
462  */
463  if ( ! list_empty ( &tcp->tx_queue ) )
464  return 0;
465 
466  /* Return TCP window length */
467  return tcp_xmit_win ( tcp );
468 }
469 
470 /**
471  * Find selective acknowledgement block
472  *
473  * @v tcp TCP connection
474  * @v seq SEQ value in SACK block (in host-endian order)
475  * @v sack SACK block to fill in (in host-endian order)
476  * @ret len Length of SACK block
477  */
479  struct tcp_sack_block *sack ) {
480  struct io_buffer *iobuf;
481  struct tcp_rx_queued_header *tcpqhdr;
482  uint32_t left = tcp->rcv_ack;
483  uint32_t right = left;
484 
485  /* Find highest block which does not start after SEQ */
486  list_for_each_entry ( iobuf, &tcp->rx_queue, list ) {
487  tcpqhdr = iobuf->data;
488  if ( tcp_cmp ( tcpqhdr->seq, right ) > 0 ) {
489  if ( tcp_cmp ( tcpqhdr->seq, seq ) > 0 )
490  break;
491  left = tcpqhdr->seq;
492  }
493  if ( tcp_cmp ( tcpqhdr->nxt, right ) > 0 )
494  right = tcpqhdr->nxt;
495  }
496 
497  /* Fail if this block does not contain SEQ */
498  if ( tcp_cmp ( right, seq ) < 0 )
499  return 0;
500 
501  /* Populate SACK block */
502  sack->left = left;
503  sack->right = right;
504  return ( right - left );
505 }
506 
507 /**
508  * Update TCP selective acknowledgement list
509  *
510  * @v tcp TCP connection
511  * @v seq SEQ value in first SACK block (in host-endian order)
512  * @ret count Number of SACK blocks
513  */
514 static unsigned int tcp_sack ( struct tcp_connection *tcp, uint32_t seq ) {
515  struct tcp_sack_block sack[TCP_SACK_MAX];
516  unsigned int old = 0;
517  unsigned int new = 0;
518  unsigned int i;
519  uint32_t len;
520 
521  /* Populate first new SACK block */
522  len = tcp_sack_block ( tcp, seq, &sack[0] );
523  if ( len )
524  new++;
525 
526  /* Populate remaining new SACK blocks based on old SACK blocks */
527  for ( old = 0 ; old < TCP_SACK_MAX ; old++ ) {
528 
529  /* Stop if we run out of space in the new list */
530  if ( new == TCP_SACK_MAX )
531  break;
532 
533  /* Skip empty old SACK blocks */
534  if ( tcp->sack[old].left == tcp->sack[old].right )
535  continue;
536 
537  /* Populate new SACK block */
538  len = tcp_sack_block ( tcp, tcp->sack[old].left, &sack[new] );
539  if ( len == 0 )
540  continue;
541 
542  /* Eliminate duplicates */
543  for ( i = 0 ; i < new ; i++ ) {
544  if ( sack[i].left == sack[new].left ) {
545  new--;
546  break;
547  }
548  }
549  new++;
550  }
551 
552  /* Update SACK list */
553  memset ( tcp->sack, 0, sizeof ( tcp->sack ) );
554  memcpy ( tcp->sack, sack, ( new * sizeof ( tcp->sack[0] ) ) );
555  return new;
556 }
557 
558 /**
559  * Process TCP transmit queue
560  *
561  * @v tcp TCP connection
562  * @v max_len Maximum length to process
563  * @v dest I/O buffer to fill with data, or NULL
564  * @v remove Remove data from queue
565  * @ret len Length of data processed
566  *
567  * This processes at most @c max_len bytes from the TCP connection's
568  * transmit queue. Data will be copied into the @c dest I/O buffer
569  * (if provided) and, if @c remove is true, removed from the transmit
570  * queue.
571  */
572 static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len,
573  struct io_buffer *dest, int remove ) {
574  struct io_buffer *iobuf;
575  struct io_buffer *tmp;
576  size_t frag_len;
577  size_t len = 0;
578 
579  list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) {
580  frag_len = iob_len ( iobuf );
581  if ( frag_len > max_len )
582  frag_len = max_len;
583  if ( dest ) {
584  memcpy ( iob_put ( dest, frag_len ), iobuf->data,
585  frag_len );
586  }
587  if ( remove ) {
588  iob_pull ( iobuf, frag_len );
589  if ( ! iob_len ( iobuf ) ) {
590  list_del ( &iobuf->list );
591  free_iob ( iobuf );
592  pending_put ( &tcp->pending_data );
593  }
594  }
595  len += frag_len;
596  max_len -= frag_len;
597  }
598  return len;
599 }
600 
601 /**
602  * Transmit any outstanding data (with selective acknowledgement)
603  *
604  * @v tcp TCP connection
605  * @v sack_seq SEQ for first selective acknowledgement (if any)
606  *
607  * Transmits any outstanding data on the connection.
608  *
609  * Note that even if an error is returned, the retransmission timer
610  * will have been started if necessary, and so the stack will
611  * eventually attempt to retransmit the failed packet.
612  */
613 static void tcp_xmit_sack ( struct tcp_connection *tcp, uint32_t sack_seq ) {
614  struct io_buffer *iobuf;
615  struct tcp_header *tcphdr;
616  struct tcp_mss_option *mssopt;
621  struct tcp_sack_block *sack;
622  void *payload;
623  unsigned int flags;
624  unsigned int sack_count;
625  unsigned int i;
626  size_t len = 0;
627  size_t sack_len;
628  uint32_t seq_len;
629  uint32_t max_rcv_win;
630  uint32_t max_representable_win;
631  int rc;
632 
633  /* Start profiling */
634  profile_start ( &tcp_tx_profiler );
635 
636  /* If retransmission timer is already running, do nothing */
637  if ( timer_running ( &tcp->timer ) )
638  return;
639 
640  /* Calculate both the actual (payload) and sequence space
641  * lengths that we wish to transmit.
642  */
643  if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) {
644  len = tcp_process_tx_queue ( tcp, tcp_xmit_win ( tcp ),
645  NULL, 0 );
646  }
647  seq_len = len;
648  flags = TCP_FLAGS_SENDING ( tcp->tcp_state );
649  if ( flags & ( TCP_SYN | TCP_FIN ) ) {
650  /* SYN or FIN consume one byte, and we can never send both */
651  assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) );
652  seq_len++;
653  }
654  tcp->snd_sent = seq_len;
655 
656  /* If we have nothing to transmit, stop now */
657  if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) )
658  return;
659 
660  /* If we are transmitting anything that requires
661  * acknowledgement (i.e. consumes sequence space), start the
662  * retransmission timer. Do this before attempting to
663  * allocate the I/O buffer, in case allocation itself fails.
664  */
665  if ( seq_len )
666  start_timer ( &tcp->timer );
667 
668  /* Allocate I/O buffer */
669  iobuf = alloc_iob ( len + TCP_MAX_HEADER_LEN );
670  if ( ! iobuf ) {
671  DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x "
672  "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ),
673  tcp->rcv_ack );
674  return;
675  }
676  iob_reserve ( iobuf, TCP_MAX_HEADER_LEN );
677 
678  /* Fill data payload from transmit queue */
679  tcp_process_tx_queue ( tcp, len, iobuf, 0 );
680 
681  /* Expand receive window if possible */
682  max_rcv_win = xfer_window ( &tcp->xfer );
683  if ( max_rcv_win > TCP_MAX_WINDOW_SIZE )
684  max_rcv_win = TCP_MAX_WINDOW_SIZE;
685  max_representable_win = ( 0xffff << tcp->rcv_win_scale );
686  if ( max_rcv_win > max_representable_win )
687  max_rcv_win = max_representable_win;
688  max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
689  if ( tcp->rcv_win < max_rcv_win )
690  tcp->rcv_win = max_rcv_win;
691 
692  /* Fill up the TCP header */
693  payload = iobuf->data;
694  if ( flags & TCP_SYN ) {
695  mssopt = iob_push ( iobuf, sizeof ( *mssopt ) );
696  mssopt->kind = TCP_OPTION_MSS;
697  mssopt->length = sizeof ( *mssopt );
698  mssopt->mss = htons ( tcp->mss );
699  wsopt = iob_push ( iobuf, sizeof ( *wsopt ) );
700  wsopt->nop = TCP_OPTION_NOP;
701  wsopt->wsopt.kind = TCP_OPTION_WS;
702  wsopt->wsopt.length = sizeof ( wsopt->wsopt );
704  spopt = iob_push ( iobuf, sizeof ( *spopt ) );
705  memset ( spopt->nop, TCP_OPTION_NOP, sizeof ( spopt->nop ) );
707  spopt->spopt.length = sizeof ( spopt->spopt );
708  }
709  if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
710  tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
711  memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) );
712  tsopt->tsopt.kind = TCP_OPTION_TS;
713  tsopt->tsopt.length = sizeof ( tsopt->tsopt );
714  tsopt->tsopt.tsval = htonl ( currticks() );
715  tsopt->tsopt.tsecr = htonl ( tcp->ts_recent );
716  }
717  if ( ( tcp->flags & TCP_SACK_ENABLED ) &&
718  ( ! list_empty ( &tcp->rx_queue ) ) &&
719  ( ( sack_count = tcp_sack ( tcp, sack_seq ) ) != 0 ) ) {
720  sack_len = ( sack_count * sizeof ( *sack ) );
721  sackopt = iob_push ( iobuf, ( sizeof ( *sackopt ) + sack_len ));
722  memset ( sackopt->nop, TCP_OPTION_NOP, sizeof ( sackopt->nop ));
723  sackopt->sackopt.kind = TCP_OPTION_SACK;
724  sackopt->sackopt.length =
725  ( sizeof ( sackopt->sackopt ) + sack_len );
726  sack = ( ( ( void * ) sackopt ) + sizeof ( *sackopt ) );
727  for ( i = 0 ; i < sack_count ; i++, sack++ ) {
728  sack->left = htonl ( tcp->sack[i].left );
729  sack->right = htonl ( tcp->sack[i].right );
730  }
731  }
732  if ( len != 0 )
733  flags |= TCP_PSH;
734  tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
735  memset ( tcphdr, 0, sizeof ( *tcphdr ) );
736  tcphdr->src = htons ( tcp->local_port );
737  tcphdr->dest = tcp->peer.st_port;
738  tcphdr->seq = htonl ( tcp->snd_seq );
739  tcphdr->ack = htonl ( tcp->rcv_ack );
740  tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
741  tcphdr->flags = flags;
742  tcphdr->win = htons ( tcp->rcv_win >> tcp->rcv_win_scale );
743  tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
744 
745  /* Dump header */
746  DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd",
747  tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
748  ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ),
749  ntohl ( tcphdr->ack ), len );
750  tcp_dump_flags ( tcp, tcphdr->flags );
751  DBGC2 ( tcp, "\n" );
752 
753  /* Transmit packet */
754  if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL,
755  &tcphdr->csum ) ) != 0 ) {
756  DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n",
757  tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ),
758  tcp->rcv_ack, strerror ( rc ) );
759  return;
760  }
761 
762  /* Clear ACK-pending flag */
763  tcp->flags &= ~TCP_ACK_PENDING;
764 
765  profile_stop ( &tcp_tx_profiler );
766 }
767 
768 /**
769  * Transmit any outstanding data
770  *
771  * @v tcp TCP connection
772  */
773 static void tcp_xmit ( struct tcp_connection *tcp ) {
774 
775  /* Transmit without an explicit first SACK */
776  tcp_xmit_sack ( tcp, tcp->rcv_ack );
777 }
778 
779 /** TCP process descriptor */
780 static struct process_descriptor tcp_process_desc =
782 
783 /**
784  * Retransmission timer expired
785  *
786  * @v timer Retransmission timer
787  * @v over Failure indicator
788  */
789 static void tcp_expired ( struct retry_timer *timer, int over ) {
790  struct tcp_connection *tcp =
791  container_of ( timer, struct tcp_connection, timer );
792 
793  DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp,
794  ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ),
795  tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack );
796 
797  assert ( ( tcp->tcp_state == TCP_SYN_SENT ) ||
798  ( tcp->tcp_state == TCP_SYN_RCVD ) ||
799  ( tcp->tcp_state == TCP_ESTABLISHED ) ||
800  ( tcp->tcp_state == TCP_FIN_WAIT_1 ) ||
801  ( tcp->tcp_state == TCP_CLOSE_WAIT ) ||
802  ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) );
803 
804  if ( over ) {
805  /* If we have finally timed out and given up,
806  * terminate the connection
807  */
808  tcp->tcp_state = TCP_CLOSED;
809  tcp_dump_state ( tcp );
810  tcp_close ( tcp, -ETIMEDOUT );
811  } else {
812  /* Otherwise, retransmit the packet */
813  tcp_xmit ( tcp );
814  }
815 }
816 
817 /**
818  * Keepalive timer expired
819  *
820  * @v timer Keepalive timer
821  * @v over Failure indicator
822  */
824  int over __unused ) {
825  struct tcp_connection *tcp =
827 
828  DBGC ( tcp, "TCP %p sending keepalive\n", tcp );
829 
830  /* Reset keepalive timer */
832 
833  /* Send keepalive. We do this only to preserve or restore
834  * state in intermediate devices (e.g. firewall NAT tables);
835  * we don't actually care about eliciting a response to verify
836  * that the peer is still alive. We therefore send just a
837  * pure ACK, to keep our transmit path simple.
838  */
839  tcp->flags |= TCP_ACK_PENDING;
840  tcp_xmit ( tcp );
841 }
842 
843 /**
844  * Shutdown timer expired
845  *
846  * @v timer Shutdown timer
847  * @v over Failure indicator
848  */
849 static void tcp_wait_expired ( struct retry_timer *timer, int over __unused ) {
850  struct tcp_connection *tcp =
851  container_of ( timer, struct tcp_connection, wait );
852 
853  assert ( tcp->tcp_state == TCP_TIME_WAIT );
854 
855  DBGC ( tcp, "TCP %p wait complete in %s for %08x..%08x %08x\n", tcp,
856  tcp_state ( tcp->tcp_state ), tcp->snd_seq,
857  ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack );
858 
859  tcp->tcp_state = TCP_CLOSED;
860  tcp_dump_state ( tcp );
861  tcp_close ( tcp, 0 );
862 }
863 
864 /**
865  * Send RST response to incoming packet
866  *
867  * @v in_tcphdr TCP header of incoming packet
868  * @ret rc Return status code
869  */
870 static int tcp_xmit_reset ( struct tcp_connection *tcp,
871  struct sockaddr_tcpip *st_dest,
872  struct tcp_header *in_tcphdr ) {
873  struct io_buffer *iobuf;
874  struct tcp_header *tcphdr;
875  int rc;
876 
877  /* Allocate space for dataless TX buffer */
878  iobuf = alloc_iob ( TCP_MAX_HEADER_LEN );
879  if ( ! iobuf ) {
880  DBGC ( tcp, "TCP %p could not allocate iobuf for RST "
881  "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ),
882  ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) );
883  return -ENOMEM;
884  }
885  iob_reserve ( iobuf, TCP_MAX_HEADER_LEN );
886 
887  /* Construct RST response */
888  tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
889  memset ( tcphdr, 0, sizeof ( *tcphdr ) );
890  tcphdr->src = in_tcphdr->dest;
891  tcphdr->dest = in_tcphdr->src;
892  tcphdr->seq = in_tcphdr->ack;
893  tcphdr->ack = in_tcphdr->seq;
894  tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
895  tcphdr->flags = ( TCP_RST | TCP_ACK );
896  tcphdr->win = htons ( 0 );
897  tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
898 
899  /* Dump header */
900  DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d",
901  tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
902  ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ),
903  ntohl ( tcphdr->ack ), 0 );
904  tcp_dump_flags ( tcp, tcphdr->flags );
905  DBGC2 ( tcp, "\n" );
906 
907  /* Transmit packet */
908  if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest,
909  NULL, &tcphdr->csum ) ) != 0 ) {
910  DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: "
911  "%s\n", tcp, ntohl ( in_tcphdr->ack ),
912  ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ),
913  strerror ( rc ) );
914  return rc;
915  }
916 
917  return 0;
918 }
919 
920 /***************************************************************************
921  *
922  * Receive data path
923  *
924  ***************************************************************************
925  */
926 
927 /**
928  * Identify TCP connection by local port number
929  *
930  * @v local_port Local port
931  * @ret tcp TCP connection, or NULL
932  */
933 static struct tcp_connection * tcp_demux ( unsigned int local_port ) {
934  struct tcp_connection *tcp;
935 
936  list_for_each_entry ( tcp, &tcp_conns, list ) {
937  if ( tcp->local_port == local_port )
938  return tcp;
939  }
940  return NULL;
941 }
942 
943 /**
944  * Parse TCP received options
945  *
946  * @v tcp TCP connection (may be NULL)
947  * @v tcphdr TCP header
948  * @v hlen TCP header length
949  * @v options Options structure to fill in
950  * @ret rc Return status code
951  */
952 static int tcp_rx_opts ( struct tcp_connection *tcp,
953  const struct tcp_header *tcphdr, size_t hlen,
954  struct tcp_options *options ) {
955  const void *data = ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) );
956  const void *end = ( ( ( void * ) tcphdr ) + hlen );
957  const struct tcp_option *option;
958  unsigned int kind;
959  size_t remaining;
960  size_t min;
961 
962  /* Sanity check */
963  assert ( hlen >= sizeof ( *tcphdr ) );
964 
965  /* Parse options */
966  memset ( options, 0, sizeof ( *options ) );
967  while ( ( remaining = ( end - data ) ) ) {
968 
969  /* Extract option code */
970  option = data;
971  kind = option->kind;
972 
973  /* Handle single-byte options */
974  if ( kind == TCP_OPTION_END )
975  break;
976  if ( kind == TCP_OPTION_NOP ) {
977  data++;
978  continue;
979  }
980 
981  /* Handle multi-byte options */
982  min = sizeof ( *option );
983  switch ( kind ) {
984  case TCP_OPTION_MSS:
985  /* Ignore received MSS */
986  break;
987  case TCP_OPTION_WS:
988  options->wsopt = data;
989  min = sizeof ( *options->wsopt );
990  break;
992  options->spopt = data;
993  min = sizeof ( *options->spopt );
994  break;
995  case TCP_OPTION_SACK:
996  /* Ignore received SACKs */
997  break;
998  case TCP_OPTION_TS:
999  options->tsopt = data;
1000  min = sizeof ( *options->tsopt );
1001  break;
1002  default:
1003  DBGC ( tcp, "TCP %p received unknown option %d\n",
1004  tcp, kind );
1005  break;
1006  }
1007  if ( remaining < min ) {
1008  DBGC ( tcp, "TCP %p received truncated option %d\n",
1009  tcp, kind );
1010  return -EINVAL;
1011  }
1012  if ( option->length < min ) {
1013  DBGC ( tcp, "TCP %p received underlength option %d\n",
1014  tcp, kind );
1015  return -EINVAL;
1016  }
1017  if ( option->length > remaining ) {
1018  DBGC ( tcp, "TCP %p received overlength option %d\n",
1019  tcp, kind );
1020  return -EINVAL;
1021  }
1022  data += option->length;
1023  }
1024 
1025  return 0;
1026 }
1027 
1028 /**
1029  * Consume received sequence space
1030  *
1031  * @v tcp TCP connection
1032  * @v seq_len Sequence space length to consume
1033  */
1034 static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) {
1035  unsigned int sack;
1036 
1037  /* Sanity check */
1038  assert ( seq_len > 0 );
1039 
1040  /* Update acknowledgement number */
1041  tcp->rcv_ack += seq_len;
1042 
1043  /* Update window */
1044  if ( tcp->rcv_win > seq_len ) {
1045  tcp->rcv_win -= seq_len;
1046  } else {
1047  tcp->rcv_win = 0;
1048  }
1049 
1050  /* Update timestamp */
1051  tcp->ts_recent = tcp->ts_val;
1052 
1053  /* Update SACK list */
1054  for ( sack = 0 ; sack < TCP_SACK_MAX ; sack++ ) {
1055  if ( tcp->sack[sack].left == tcp->sack[sack].right )
1056  continue;
1057  if ( tcp_cmp ( tcp->sack[sack].left, tcp->rcv_ack ) < 0 )
1058  tcp->sack[sack].left = tcp->rcv_ack;
1059  if ( tcp_cmp ( tcp->sack[sack].right, tcp->rcv_ack ) < 0 )
1060  tcp->sack[sack].right = tcp->rcv_ack;
1061  }
1062 
1063  /* Mark ACK as pending */
1064  tcp->flags |= TCP_ACK_PENDING;
1065 }
1066 
1067 /**
1068  * Handle TCP received SYN
1069  *
1070  * @v tcp TCP connection
1071  * @v seq SEQ value (in host-endian order)
1072  * @v options TCP options
1073  * @ret rc Return status code
1074  */
1075 static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
1076  struct tcp_options *options ) {
1077 
1078  /* Synchronise sequence numbers on first SYN */
1079  if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
1080  tcp->rcv_ack = seq;
1081  if ( options->tsopt )
1082  tcp->flags |= TCP_TS_ENABLED;
1083  if ( options->spopt )
1084  tcp->flags |= TCP_SACK_ENABLED;
1085  if ( options->wsopt ) {
1086  tcp->snd_win_scale = options->wsopt->scale;
1088  }
1089  DBGC ( tcp, "TCP %p using %stimestamps, %sSACK, TX window "
1090  "x%d, RX window x%d\n", tcp,
1091  ( ( tcp->flags & TCP_TS_ENABLED ) ? "" : "no " ),
1092  ( ( tcp->flags & TCP_SACK_ENABLED ) ? "" : "no " ),
1093  ( 1 << tcp->snd_win_scale ),
1094  ( 1 << tcp->rcv_win_scale ) );
1095  }
1096 
1097  /* Ignore duplicate SYN */
1098  if ( seq != tcp->rcv_ack )
1099  return 0;
1100 
1101  /* Acknowledge SYN */
1102  tcp_rx_seq ( tcp, 1 );
1103 
1104  /* Mark SYN as received and start sending ACKs with each packet */
1105  tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) |
1106  TCP_STATE_RCVD ( TCP_SYN ) );
1107 
1108  return 0;
1109 }
1110 
1111 /**
1112  * Handle TCP received ACK
1113  *
1114  * @v tcp TCP connection
1115  * @v ack ACK value (in host-endian order)
1116  * @v win WIN value (in host-endian order)
1117  * @ret rc Return status code
1118  */
1119 static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
1120  uint32_t win ) {
1121  uint32_t ack_len = ( ack - tcp->snd_seq );
1122  size_t len;
1123  unsigned int acked_flags;
1124 
1125  /* Check for out-of-range or old duplicate ACKs */
1126  if ( ack_len > tcp->snd_sent ) {
1127  DBGC ( tcp, "TCP %p received ACK for %08x..%08x, "
1128  "sent only %08x..%08x\n", tcp, tcp->snd_seq,
1129  ( tcp->snd_seq + ack_len ), tcp->snd_seq,
1130  ( tcp->snd_seq + tcp->snd_sent ) );
1131 
1132  if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) {
1133  /* Just ignore what might be old duplicate ACKs */
1134  return 0;
1135  } else {
1136  /* Send RST if an out-of-range ACK is received
1137  * on a not-yet-established connection, as per
1138  * RFC 793.
1139  */
1140  return -EINVAL;
1141  }
1142  }
1143 
1144  /* Update window size */
1145  tcp->snd_win = win;
1146 
1147  /* Hold off (or start) the keepalive timer, if applicable */
1148  if ( ! ( tcp->tcp_state & TCP_STATE_SENT ( TCP_FIN ) ) )
1150 
1151  /* Ignore ACKs that don't actually acknowledge any new data.
1152  * (In particular, do not stop the retransmission timer; this
1153  * avoids creating a sorceror's apprentice syndrome when a
1154  * duplicate ACK is received and we still have data in our
1155  * transmit queue.)
1156  */
1157  if ( ack_len == 0 )
1158  return 0;
1159 
1160  /* Stop the retransmission timer */
1161  stop_timer ( &tcp->timer );
1162 
1163  /* Determine acknowledged flags and data length */
1164  len = ack_len;
1165  acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) &
1166  ( TCP_SYN | TCP_FIN ) );
1167  if ( acked_flags ) {
1168  len--;
1169  pending_put ( &tcp->pending_flags );
1170  }
1171 
1172  /* Update SEQ and sent counters */
1173  tcp->snd_seq = ack;
1174  tcp->snd_sent = 0;
1175 
1176  /* Remove any acknowledged data from transmit queue */
1177  tcp_process_tx_queue ( tcp, len, NULL, 1 );
1178 
1179  /* Mark SYN/FIN as acknowledged if applicable. */
1180  if ( acked_flags )
1181  tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags );
1182 
1183  /* Start sending FIN if we've had all possible data ACKed */
1184  if ( list_empty ( &tcp->tx_queue ) &&
1185  ( tcp->flags & TCP_XFER_CLOSED ) &&
1186  ! ( tcp->tcp_state & TCP_STATE_SENT ( TCP_FIN ) ) ) {
1187  tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
1188  pending_get ( &tcp->pending_flags );
1189  }
1190 
1191  return 0;
1192 }
1193 
1194 /**
1195  * Handle TCP received data
1196  *
1197  * @v tcp TCP connection
1198  * @v seq SEQ value (in host-endian order)
1199  * @v iobuf I/O buffer
1200  * @ret rc Return status code
1201  *
1202  * This function takes ownership of the I/O buffer.
1203  */
1204 static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
1205  struct io_buffer *iobuf ) {
1206  uint32_t already_rcvd;
1207  uint32_t len;
1208  int rc;
1209 
1210  /* Ignore duplicate or out-of-order data */
1211  already_rcvd = ( tcp->rcv_ack - seq );
1212  len = iob_len ( iobuf );
1213  if ( already_rcvd >= len ) {
1214  free_iob ( iobuf );
1215  return 0;
1216  }
1217  iob_pull ( iobuf, already_rcvd );
1218  len -= already_rcvd;
1219 
1220  /* Acknowledge new data */
1221  tcp_rx_seq ( tcp, len );
1222 
1223  /* Update statistics */
1225 
1226  /* Deliver data to application */
1227  profile_start ( &tcp_xfer_profiler );
1228  if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) {
1229  DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n",
1230  tcp, seq, ( seq + len ), strerror ( rc ) );
1231  return rc;
1232  }
1233  profile_stop ( &tcp_xfer_profiler );
1234 
1235  return 0;
1236 }
1237 
1238 /**
1239  * Handle TCP received FIN
1240  *
1241  * @v tcp TCP connection
1242  * @v seq SEQ value (in host-endian order)
1243  * @ret rc Return status code
1244  */
1245 static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) {
1246 
1247  /* Ignore duplicate or out-of-order FIN */
1248  if ( seq != tcp->rcv_ack )
1249  return 0;
1250 
1251  /* Acknowledge FIN */
1252  tcp_rx_seq ( tcp, 1 );
1253 
1254  /* Mark FIN as received */
1255  tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
1256 
1257  /* Close connection */
1258  tcp_close ( tcp, 0 );
1259 
1260  return 0;
1261 }
1262 
1263 /**
1264  * Handle TCP received RST
1265  *
1266  * @v tcp TCP connection
1267  * @v seq SEQ value (in host-endian order)
1268  * @ret rc Return status code
1269  */
1270 static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) {
1271 
1272  /* Accept RST only if it falls within the window. If we have
1273  * not yet received a SYN, then we have no window to test
1274  * against, so fall back to checking that our SYN has been
1275  * ACKed.
1276  */
1277  if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
1278  if ( ! tcp_in_window ( seq, tcp->rcv_ack, tcp->rcv_win ) )
1279  return 0;
1280  } else {
1281  if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
1282  return 0;
1283  }
1284 
1285  /* Abort connection */
1286  tcp->tcp_state = TCP_CLOSED;
1287  tcp_dump_state ( tcp );
1288  tcp_close ( tcp, -ECONNRESET );
1289 
1290  DBGC ( tcp, "TCP %p connection reset by peer\n", tcp );
1291  return -ECONNRESET;
1292 }
1293 
1294 /**
1295  * Enqueue received TCP packet
1296  *
1297  * @v tcp TCP connection
1298  * @v seq SEQ value (in host-endian order)
1299  * @v flags TCP flags
1300  * @v iobuf I/O buffer
1301  */
1302 static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
1303  uint8_t flags, struct io_buffer *iobuf ) {
1304  struct tcp_rx_queued_header *tcpqhdr;
1305  struct io_buffer *queued;
1306  size_t len;
1307  uint32_t seq_len;
1308  uint32_t nxt;
1309  uint32_t gap;
1310 
1311  /* Calculate remaining flags and sequence length. Note that
1312  * SYN, if present, has already been processed by this point.
1313  */
1314  flags &= TCP_FIN;
1315  len = iob_len ( iobuf );
1316  seq_len = ( len + ( flags ? 1 : 0 ) );
1317  nxt = ( seq + seq_len );
1318 
1319  /* Discard immediately (to save memory) if:
1320  *
1321  * a) we have not yet received a SYN (and so have no defined
1322  * receive window), or
1323  * b) the packet lies entirely outside the receive window, or
1324  * c) there is no further content to process.
1325  */
1326  if ( ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) ||
1327  ( tcp_cmp ( seq, tcp->rcv_ack + tcp->rcv_win ) >= 0 ) ||
1328  ( tcp_cmp ( nxt, tcp->rcv_ack ) <= 0 ) ||
1329  ( seq_len == 0 ) ) {
1330  free_iob ( iobuf );
1331  return;
1332  }
1333 
1334  /* Add internal header */
1335  tcpqhdr = iob_push ( iobuf, sizeof ( *tcpqhdr ) );
1336  tcpqhdr->seq = seq;
1337  tcpqhdr->nxt = nxt;
1338  tcpqhdr->flags = flags;
1339 
1340  /* Add to RX queue */
1341  gap = tcp->rcv_ack;
1342  list_for_each_entry ( queued, &tcp->rx_queue, list ) {
1343  tcpqhdr = queued->data;
1344  if ( tcp_cmp ( seq, tcpqhdr->seq ) < 0 )
1345  break;
1346  gap = tcpqhdr->nxt;
1347  }
1348  list_add_tail ( &iobuf->list, &queued->list );
1349 
1350  /* Update statistics */
1351  if ( seq != gap )
1353 }
1354 
1355 /**
1356  * Process receive queue
1357  *
1358  * @v tcp TCP connection
1359  */
1360 static void tcp_process_rx_queue ( struct tcp_connection *tcp ) {
1361  struct io_buffer *iobuf;
1362  struct tcp_rx_queued_header *tcpqhdr;
1363  uint32_t seq;
1364  unsigned int flags;
1365  size_t len;
1366 
1367  /* Process all applicable received buffers. Note that we
1368  * cannot use list_for_each_entry() to iterate over the RX
1369  * queue, since tcp_discard() may remove packets from the RX
1370  * queue while we are processing.
1371  */
1372  while ( ( iobuf = list_first_entry ( &tcp->rx_queue, struct io_buffer,
1373  list ) ) ) {
1374 
1375  /* Stop processing when we hit the first gap */
1376  tcpqhdr = iobuf->data;
1377  if ( tcp_cmp ( tcpqhdr->seq, tcp->rcv_ack ) > 0 )
1378  break;
1379 
1380  /* Strip internal header and remove from RX queue */
1381  list_del ( &iobuf->list );
1382  seq = tcpqhdr->seq;
1383  flags = tcpqhdr->flags;
1384  iob_pull ( iobuf, sizeof ( *tcpqhdr ) );
1385  len = iob_len ( iobuf );
1386 
1387  /* Handle new data, if any */
1388  tcp_rx_data ( tcp, seq, iob_disown ( iobuf ) );
1389  seq += len;
1390 
1391  /* Handle FIN, if present */
1392  if ( flags & TCP_FIN ) {
1393  tcp_rx_fin ( tcp, seq );
1394  seq++;
1395  }
1396  }
1397 }
1398 
1399 /**
1400  * Process received packet
1401  *
1402  * @v iobuf I/O buffer
1403  * @v netdev Network device
1404  * @v st_src Partially-filled source address
1405  * @v st_dest Partially-filled destination address
1406  * @v pshdr_csum Pseudo-header checksum
1407  * @ret rc Return status code
1408  */
1409 static int tcp_rx ( struct io_buffer *iobuf,
1410  struct net_device *netdev __unused,
1411  struct sockaddr_tcpip *st_src,
1412  struct sockaddr_tcpip *st_dest __unused,
1413  uint16_t pshdr_csum ) {
1414  struct tcp_header *tcphdr = iobuf->data;
1415  struct tcp_connection *tcp;
1416  struct tcp_options options;
1417  size_t hlen;
1418  uint16_t csum;
1419  uint32_t seq;
1420  uint32_t ack;
1421  uint16_t raw_win;
1422  uint32_t win;
1423  unsigned int flags;
1424  size_t len;
1425  uint32_t seq_len;
1426  size_t old_xfer_window;
1427  int rc;
1428 
1429  /* Start profiling */
1430  profile_start ( &tcp_rx_profiler );
1431 
1432  /* Sanity check packet */
1433  if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) {
1434  DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n",
1435  iob_len ( iobuf ), sizeof ( *tcphdr ) );
1436  rc = -EINVAL;
1437  goto discard;
1438  }
1439  hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4;
1440  if ( hlen < sizeof ( *tcphdr ) ) {
1441  DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n",
1442  hlen, sizeof ( *tcphdr ) );
1443  rc = -EINVAL;
1444  goto discard;
1445  }
1446  if ( hlen > iob_len ( iobuf ) ) {
1447  DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n",
1448  hlen, iob_len ( iobuf ) );
1449  rc = -EINVAL;
1450  goto discard;
1451  }
1452  csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data,
1453  iob_len ( iobuf ) );
1454  if ( csum != 0 ) {
1455  DBG ( "TCP checksum incorrect (is %04x including checksum "
1456  "field, should be 0000)\n", csum );
1457  rc = -EINVAL;
1458  goto discard;
1459  }
1460 
1461  /* Parse parameters from header and strip header */
1462  tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
1463  seq = ntohl ( tcphdr->seq );
1464  ack = ntohl ( tcphdr->ack );
1465  raw_win = ntohs ( tcphdr->win );
1466  flags = tcphdr->flags;
1467  if ( ( rc = tcp_rx_opts ( tcp, tcphdr, hlen, &options ) ) != 0 )
1468  goto discard;
1469  if ( tcp && options.tsopt )
1470  tcp->ts_val = ntohl ( options.tsopt->tsval );
1471  iob_pull ( iobuf, hlen );
1472  len = iob_len ( iobuf );
1473  seq_len = ( len + ( ( flags & TCP_SYN ) ? 1 : 0 ) +
1474  ( ( flags & TCP_FIN ) ? 1 : 0 ) );
1475 
1476  /* Update statistics */
1477  tcp_stats.in_segs++;
1478  tcp_stats.in_octets += len;
1479 
1480  /* Dump header */
1481  DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08x %4zd",
1482  tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ),
1483  ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ),
1484  ( ntohl ( tcphdr->seq ) + seq_len ), len );
1485  tcp_dump_flags ( tcp, tcphdr->flags );
1486  DBGC2 ( tcp, "\n" );
1487 
1488  /* If no connection was found, silently drop packet */
1489  if ( ! tcp ) {
1490  rc = -ENOTCONN;
1491  goto discard;
1492  }
1493 
1494  /* Record old data-transfer window */
1495  old_xfer_window = tcp_xfer_window ( tcp );
1496 
1497  /* Handle ACK, if present */
1498  if ( flags & TCP_ACK ) {
1499  win = ( raw_win << tcp->snd_win_scale );
1500  if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) {
1501  tcp_xmit_reset ( tcp, st_src, tcphdr );
1502  goto discard;
1503  }
1504  }
1505 
1506  /* Force an ACK if this packet is out of order */
1507  if ( ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) &&
1508  ( seq != tcp->rcv_ack ) ) {
1509  tcp->flags |= TCP_ACK_PENDING;
1510  }
1511 
1512  /* Handle SYN, if present */
1513  if ( flags & TCP_SYN ) {
1514  tcp_rx_syn ( tcp, seq, &options );
1515  seq++;
1516  }
1517 
1518  /* Handle RST, if present */
1519  if ( flags & TCP_RST ) {
1520  if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 )
1521  goto discard;
1522  }
1523 
1524  /* Enqueue received data */
1525  tcp_rx_enqueue ( tcp, seq, flags, iob_disown ( iobuf ) );
1526 
1527  /* Process receive queue */
1528  tcp_process_rx_queue ( tcp );
1529 
1530  /* Dump out any state change as a result of the received packet */
1531  tcp_dump_state ( tcp );
1532 
1533  /* Schedule transmission of ACK (and any pending data). If we
1534  * have received any out-of-order packets (i.e. if the receive
1535  * queue remains non-empty after processing) then send the ACK
1536  * immediately in order to trigger Fast Retransmission.
1537  */
1538  if ( list_empty ( &tcp->rx_queue ) ) {
1539  process_add ( &tcp->process );
1540  } else {
1541  tcp_xmit_sack ( tcp, seq );
1542  }
1543 
1544  /* If this packet was the last we expect to receive, set up
1545  * timer to expire and cause the connection to be freed.
1546  */
1547  if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) {
1548  stop_timer ( &tcp->wait );
1549  start_timer_fixed ( &tcp->wait, ( 2 * TCP_MSL ) );
1550  }
1551 
1552  /* Notify application if window has changed */
1553  if ( tcp_xfer_window ( tcp ) != old_xfer_window )
1554  xfer_window_changed ( &tcp->xfer );
1555 
1556  profile_stop ( &tcp_rx_profiler );
1557  return 0;
1558 
1559  discard:
1560  /* Free received packet */
1561  free_iob ( iobuf );
1562  return rc;
1563 }
1564 
1565 /** TCP protocol */
1566 struct tcpip_protocol tcp_protocol __tcpip_protocol = {
1567  .name = "TCP",
1568  .rx = tcp_rx,
1569  .tcpip_proto = IP_TCP,
1570 };
1571 
1572 /**
1573  * Discard some cached TCP data
1574  *
1575  * @ret discarded Number of cached items discarded
1576  */
1577 static unsigned int tcp_discard ( void ) {
1578  struct tcp_connection *tcp;
1579  struct io_buffer *iobuf;
1580  unsigned int discarded = 0;
1581 
1582  /* Try to drop one queued RX packet from each connection */
1583  list_for_each_entry ( tcp, &tcp_conns, list ) {
1584  list_for_each_entry_reverse ( iobuf, &tcp->rx_queue, list ) {
1585 
1586  /* Remove packet from queue */
1587  list_del ( &iobuf->list );
1588  free_iob ( iobuf );
1589 
1590  /* Update statistics */
1592 
1593  /* Report discard */
1594  discarded++;
1595  break;
1596  }
1597  }
1598 
1599  return discarded;
1600 }
1601 
1602 /** TCP cache discarder */
1603 struct cache_discarder tcp_discarder __cache_discarder ( CACHE_NORMAL ) = {
1604  .discard = tcp_discard,
1605 };
1606 
1607 /**
1608  * Find first TCP connection that has not yet been closed
1609  *
1610  * @ret tcp First unclosed connection, or NULL
1611  */
1612 static struct tcp_connection * tcp_first_unclosed ( void ) {
1613  struct tcp_connection *tcp;
1614 
1615  /* Find first connection which has not yet been closed */
1616  list_for_each_entry ( tcp, &tcp_conns, list ) {
1617  if ( ! ( tcp->flags & TCP_XFER_CLOSED ) )
1618  return tcp;
1619  }
1620  return NULL;
1621 }
1622 
1623 /**
1624  * Find first TCP connection that has not yet finished all operations
1625  *
1626  * @ret tcp First unfinished connection, or NULL
1627  */
1628 static struct tcp_connection * tcp_first_unfinished ( void ) {
1629  struct tcp_connection *tcp;
1630 
1631  /* Find first connection which has not yet closed gracefully,
1632  * or which still has a pending transmission (e.g. to ACK the
1633  * received FIN).
1634  */
1635  list_for_each_entry ( tcp, &tcp_conns, list ) {
1636  if ( ( ! TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) ||
1637  process_running ( &tcp->process ) ) {
1638  return tcp;
1639  }
1640  }
1641  return NULL;
1642 }
1643 
1644 /**
1645  * Shut down all TCP connections
1646  *
1647  */
1648 static void tcp_shutdown ( int booting __unused ) {
1649  struct tcp_connection *tcp;
1650  unsigned long start;
1651  unsigned long elapsed;
1652 
1653  /* Initiate a graceful close of all connections, allowing for
1654  * the fact that the connection list may change as we do so.
1655  */
1656  while ( ( tcp = tcp_first_unclosed() ) ) {
1657  DBGC ( tcp, "TCP %p closing for shutdown\n", tcp );
1658  tcp_close ( tcp, -ECANCELED );
1659  }
1660 
1661  /* Wait for all connections to finish closing gracefully */
1662  start = currticks();
1663  while ( ( tcp = tcp_first_unfinished() ) &&
1664  ( ( elapsed = ( currticks() - start ) ) < TCP_FINISH_TIMEOUT )){
1665  step();
1666  }
1667 
1668  /* Forcibly close any remaining connections */
1669  while ( ( tcp = list_first_entry ( &tcp_conns, struct tcp_connection,
1670  list ) ) != NULL ) {
1671  tcp->tcp_state = TCP_CLOSED;
1672  tcp_dump_state ( tcp );
1673  tcp_close ( tcp, -ECANCELED );
1674  }
1675 }
1676 
1677 /** TCP shutdown function */
1678 struct startup_fn tcp_startup_fn __startup_fn ( STARTUP_LATE ) = {
1679  .name = "tcp",
1680  .shutdown = tcp_shutdown,
1681 };
1682 
1683 /***************************************************************************
1684  *
1685  * Data transfer interface
1686  *
1687  ***************************************************************************
1688  */
1689 
1690 /**
1691  * Close interface
1692  *
1693  * @v tcp TCP connection
1694  * @v rc Reason for close
1695  */
1696 static void tcp_xfer_close ( struct tcp_connection *tcp, int rc ) {
1697 
1698  /* Close data transfer interface */
1699  tcp_close ( tcp, rc );
1700 
1701  /* Transmit FIN, if possible */
1702  tcp_xmit ( tcp );
1703 }
1704 
1705 /**
1706  * Deliver datagram as I/O buffer
1707  *
1708  * @v tcp TCP connection
1709  * @v iobuf Datagram I/O buffer
1710  * @v meta Data transfer metadata
1711  * @ret rc Return status code
1712  */
1713 static int tcp_xfer_deliver ( struct tcp_connection *tcp,
1714  struct io_buffer *iobuf,
1715  struct xfer_metadata *meta __unused ) {
1716 
1717  /* Enqueue packet */
1718  list_add_tail ( &iobuf->list, &tcp->tx_queue );
1719 
1720  /* Each enqueued packet is a pending operation */
1721  pending_get ( &tcp->pending_data );
1722 
1723  /* Transmit data, if possible */
1724  tcp_xmit ( tcp );
1725 
1726  return 0;
1727 }
1728 
1729 /**
1730  * Report job progress
1731  *
1732  * @v tcp TCP connection
1733  * @v progress Progress report to fill in
1734  * @ret ongoing_rc Ongoing job status code (if known)
1735  */
1736 static int tcp_progress ( struct tcp_connection *tcp,
1737  struct job_progress *progress ) {
1738 
1739  /* Report connection in progress if applicable */
1740  if ( ! TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) {
1741  snprintf ( progress->message, sizeof ( progress->message ),
1742  "connecting" );
1743  }
1744 
1745  return 0;
1746 }
1747 
1748 /** TCP data transfer interface operations */
1754 };
1755 
1756 /** TCP data transfer interface descriptor */
1757 static struct interface_descriptor tcp_xfer_desc =
1758  INTF_DESC ( struct tcp_connection, xfer, tcp_xfer_operations );
1759 
1760 /***************************************************************************
1761  *
1762  * Openers
1763  *
1764  ***************************************************************************
1765  */
1766 
1767 /** TCP socket opener */
1768 struct socket_opener tcp_socket_opener __socket_opener = {
1770  .open = tcp_open,
1771 };
1772 
1773 /** Linkage hack */
1775 
1776 /**
1777  * Open TCP URI
1778  *
1779  * @v xfer Data transfer interface
1780  * @v uri URI
1781  * @ret rc Return status code
1782  */
1783 static int tcp_open_uri ( struct interface *xfer, struct uri *uri ) {
1784  struct sockaddr_tcpip peer;
1785 
1786  /* Sanity check */
1787  if ( ! uri->host )
1788  return -EINVAL;
1789 
1790  memset ( &peer, 0, sizeof ( peer ) );
1791  peer.st_port = htons ( uri_port ( uri, 0 ) );
1792  return xfer_open_named_socket ( xfer, SOCK_STREAM,
1793  ( struct sockaddr * ) &peer,
1794  uri->host, NULL );
1795 }
1796 
1797 /** TCP URI opener */
1798 struct uri_opener tcp_uri_opener __uri_opener = {
1799  .scheme = "tcp",
1800  .open = tcp_open_uri,
1801 };
1802 
unsigned long in_out_of_order
Total number of packets received out of order.
Definition: tcp.h:450
A TCP connection.
Definition: tcp.c:34
A process.
Definition: process.h:18
#define iob_pull(iobuf, len)
Definition: iobuf.h:107
#define TCP_OPTION_NOP
TCP option pad.
Definition: tcp.h:40
struct retry_timer wait
Shutdown (TIME_WAIT) timer.
Definition: tcp.c:121
#define __attribute__(x)
Definition: compiler.h:10
#define EINVAL
Invalid argument.
Definition: errno.h:429
An object interface operation.
Definition: interface.h:18
#define ECONNRESET
Connection reset.
Definition: errno.h:364
TCP/IP socket address.
Definition: tcpip.h:76
struct arbelprm_rc_send_wqe rc
Definition: arbel.h:14
void xfer_window_changed(struct interface *intf)
Report change of flow control window.
Definition: xfer.c:147
struct tcp_timestamp_option tsopt
Definition: tcp.h:136
unsigned short uint16_t
Definition: stdint.h:11
void intf_close(struct interface *intf, int rc)
Close an object interface.
Definition: interface.c:250
Padded TCP window scale option (used for sending)
Definition: tcp.h:66
#define iob_put(iobuf, len)
Definition: iobuf.h:125
struct list_head rx_queue
Receive queue.
Definition: tcp.c:113
Data transfer metadata.
Definition: xfer.h:23
void intf_shutdown(struct interface *intf, int rc)
Shut down an object interface.
Definition: interface.c:279
size_t tcpip_mtu(struct sockaddr_tcpip *st_dest)
Determine maximum transmission unit.
Definition: tcpip.c:132
unsigned int tcp_state
Current TCP state.
Definition: tcp.c:54
int xfer_deliver_iob(struct interface *intf, struct io_buffer *iobuf)
Deliver datagram as I/O buffer without metadata.
Definition: xfer.c:256
uint8_t nop[2]
Definition: tcp.h:118
Parsed TCP options.
Definition: tcp.h:143
static void start_timer_nodelay(struct retry_timer *timer)
Start timer with no delay.
Definition: retry.h:100
#define TCP_MASK_HLEN
Mask for TCP header length field.
Definition: tcp.h:325
static void tcp_keepalive_expired(struct retry_timer *timer, int over)
Keepalive timer expired.
Definition: tcp.c:823
uint8_t state
State.
Definition: eth_slow.h:48
uint8_t kind
Definition: tcp.h:50
struct retry_timer timer
Retransmission timer.
Definition: tcp.c:117
#define TCP_SYN_SENT
SYN_SENT.
Definition: tcp.h:216
#define TCP_LISTEN
LISTEN.
Definition: tcp.h:210
Padded TCP selective acknowledgement option (used for sending)
Definition: tcp.h:117
uint32_t snd_seq
Current sequence number.
Definition: tcp.c:64
#define list_add(new, head)
Add a new entry to the head of a list.
Definition: list.h:70
#define TCP_FIN
Definition: tcp.h:164
static int tcp_port_available(int port)
Check if local TCP port is available.
Definition: tcp.c:264
#define ref_init(refcnt, free)
Initialise a reference counter.
Definition: refcnt.h:65
Error codes.
struct retry_timer keepalive
Keepalive timer.
Definition: tcp.c:119
struct refcnt refcnt
Reference counter.
Definition: tcp.c:36
#define TCP_RX_WINDOW_SCALE
Advertised TCP window scale.
Definition: tcp.h:81
#define iob_push(iobuf, len)
Definition: iobuf.h:89
#define TCP_ACK
Definition: tcp.h:160
static size_t tcp_process_tx_queue(struct tcp_connection *tcp, size_t max_len, struct io_buffer *dest, int remove)
Process TCP transmit queue.
Definition: tcp.c:572
I/O buffers.
void free_iob(struct io_buffer *iobuf)
Free I/O buffer.
Definition: iobuf.c:153
uint8_t kind
Definition: tcp.h:100
static int tcp_xmit_reset(struct tcp_connection *tcp, struct sockaddr_tcpip *st_dest, struct tcp_header *in_tcphdr)
Send RST response to incoming packet.
Definition: tcp.c:870
uint8_t length
Definition: tcp.h:101
Retry timers.
void pending_put(struct pending_operation *pending)
Mark an operation as no longer pending.
Definition: pending.c:59
#define EADDRINUSE
Address already in use.
Definition: errno.h:304
static int tcp_rx_fin(struct tcp_connection *tcp, uint32_t seq)
Handle TCP received FIN.
Definition: tcp.c:1245
#define TCP_PSH
Definition: tcp.h:161
#define DBGC(...)
Definition: compiler.h:505
#define min(x, y)
Definition: ath.h:36
uint32_t tsecr
Definition: tcp.h:130
uint8_t flags
Definition: tcp.h:26
A process descriptor.
Definition: process.h:32
TCP selective acknowledgement is enabled.
Definition: tcp.c:138
uint32_t seq
Definition: tcp.h:23
A retry timer.
Definition: retry.h:22
void intf_plug_plug(struct interface *a, struct interface *b)
Plug two object interfaces together.
Definition: interface.c:108
#define TCP_OPTION_MSS
Code for the TCP MSS option.
Definition: tcp.h:56
TCP timestamps are enabled.
Definition: tcp.c:134
static int tcp_rx_rst(struct tcp_connection *tcp, uint32_t seq)
Handle TCP received RST.
Definition: tcp.c:1270
unsigned int prev_tcp_state
Previous TCP state.
Definition: tcp.c:59
#define ntohl(value)
Definition: byteswap.h:135
struct cache_discarder tcp_discarder __cache_discarder(CACHE_NORMAL)
TCP cache discarder.
int old
Definition: bitops.h:65
struct pending_operation pending_data
Pending operations for transmit queue.
Definition: tcp.c:126
unsigned long in_segs
Number of packets received.
Definition: tcp.h:446
tcp_flags
TCP flags.
Definition: tcp.c:130
iPXE timers
#define ntohs(value)
Definition: byteswap.h:137
uint8_t snd_win_scale
Send window scale.
Definition: tcp.c:100
TCP statistics.
Definition: tcp.h:444
#define PROC_DESC_ONCE(object_type, process, _step)
Define a process descriptor for a process that runs only once.
Definition: process.h:98
#define TCP_MSL
TCP maximum segment lifetime.
Definition: tcp.h:386
A data structure for storing profiling information.
Definition: profile.h:27
#define TCP_RST
Definition: tcp.h:162
static size_t tcp_xfer_window(struct tcp_connection *tcp)
Check data-transfer flow control window.
Definition: tcp.c:457
Uniform Resource Identifiers.
static void profile_stop(struct profiler *profiler)
Stop profiling.
Definition: profile.h:174
static int tcp_rx(struct io_buffer *iobuf, struct net_device *netdev __unused, struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest __unused, uint16_t pshdr_csum)
Process received packet.
Definition: tcp.c:1409
void process_del(struct process *process)
Remove process from process list.
Definition: process.c:80
struct io_buffer * alloc_iob(size_t len)
Allocate I/O buffer.
Definition: iobuf.c:131
const char * name
Definition: init.h:44
int semantics
Communication semantics (e.g.
Definition: open.h:73
#define htonl(value)
Definition: byteswap.h:134
static void tcp_shutdown(int booting __unused)
Shut down all TCP connections.
Definition: tcp.c:1648
size_t xfer_window(struct interface *intf)
Check flow control window.
Definition: xfer.c:117
int tcpip_bind(struct sockaddr_tcpip *st_local, int(*available)(int port))
Bind to local TCP/IP port.
Definition: tcpip.c:215
#define ECANCELED
Operation canceled.
Definition: errno.h:344
#define TCP_OPTION_TS
Code for the TCP timestamp option.
Definition: tcp.h:140
u16 seq
802.11 Sequence Control field
Definition: ieee80211.h:19
#define TCP_MAX_WINDOW_SIZE
Maxmimum advertised TCP window size.
Definition: tcp.h:363
#define STARTUP_LATE
Late startup.
Definition: init.h:66
A doubly-linked list entry (or list head)
Definition: list.h:19
static struct interface_operation tcp_xfer_operations[]
TCP data transfer interface operations.
Definition: tcp.c:1749
Dynamic memory allocation.
unsigned int flags
Flags.
Definition: tcp.c:41
#define TCP_SYN
Definition: tcp.h:163
Data transfer interfaces.
#define TCP_FIN_WAIT_1
FIN_WAIT_1.
Definition: tcp.h:247
A reference counter.
Definition: refcnt.h:27
A timer.
Definition: timer.h:29
uint32_t start
Starting offset.
Definition: netvsc.h:12
#define list_empty(list)
Test whether a list is empty.
Definition: list.h:137
#define TCP_CLOSE_WAIT
CLOSE_WAIT.
Definition: tcp.h:288
unsigned long tmp
Definition: linux_pci.h:65
#define list_first_entry(list, type, member)
Get the container of the first entry in a list.
Definition: list.h:334
struct list_head tx_queue
Transmit queue.
Definition: tcp.c:111
static void tcp_close(struct tcp_connection *tcp, int rc)
Close TCP connection.
Definition: tcp.c:353
A startup/shutdown function.
Definition: init.h:43
#define list_del(list)
Delete an entry from a list.
Definition: list.h:120
uint32_t right
Definition: tcp.h:107
#define TCP_OPTION_WS
Code for the TCP window scale option.
Definition: tcp.h:72
FILE_SECBOOT(PERMITTED)
#define ENOMEM
Not enough space.
Definition: errno.h:535
static unsigned int tcp_discard(void)
Discard some cached TCP data.
Definition: tcp.c:1577
#define iob_disown(iobuf)
Disown an I/O buffer.
Definition: iobuf.h:217
uint32_t ts_recent
Most recent received timestamp that advanced the window.
Definition: tcp.c:95
#define TCP_OPTION_SACK_PERMITTED
Code for the TCP selective acknowledgement permitted option.
Definition: tcp.h:96
void * memcpy(void *dest, const void *src, size_t len) __nonnull
const char * name
Name.
Definition: profile.h:29
u8 port
Port number.
Definition: CIB_PRM.h:31
Padded TCP selective acknowledgement permitted option (used for sending)
Definition: tcp.h:90
uint32_t seq
SEQ value, in host-endian order.
Definition: tcp.c:152
#define TCP_OPTION_SACK
Code for the TCP selective acknowledgement option.
Definition: tcp.h:123
uint32_t ack
Definition: tcp.h:24
unsigned long in_discards
Total number of packets discarded due to lack of memory.
Definition: tcp.h:448
struct socket_opener tcp_socket_opener __socket_opener
TCP socket opener.
Definition: tcp.c:1768
Assertions.
Padded TCP timestamp option (used for sending)
Definition: tcp.h:134
assert((readw(&hdr->flags) &(GTF_reading|GTF_writing))==0)
#define container_of(ptr, type, field)
Get containing structure.
Definition: stddef.h:36
static const char * tcp_state(int state)
Name TCP state.
Definition: tcp.c:200
An object interface.
Definition: interface.h:125
static struct tcp_connection * tcp_first_unclosed(void)
Find first TCP connection that has not yet been closed.
Definition: tcp.c:1612
uint32_t ts_val
Received timestamp value.
Definition: tcp.c:90
A long option, as used for getopt_long()
Definition: getopt.h:25
static int tcp_rx_opts(struct tcp_connection *tcp, const struct tcp_header *tcphdr, size_t hlen, struct tcp_options *options)
Parse TCP received options.
Definition: tcp.c:952
unsigned int(* discard)(void)
Discard some cached data.
Definition: malloc.h:99
static int options
Definition: 3c515.c:286
#define list_for_each_entry(pos, head, member)
Iterate over entries in a list.
Definition: list.h:432
#define __unused
Declare a variable or data structure as unused.
Definition: compiler.h:573
#define list_add_tail(new, head)
Add a new entry to the tail of a list.
Definition: list.h:94
uint32_t nxt
Next SEQ value, in host-endian order.
Definition: tcp.c:154
ring len
Length.
Definition: dwmac.h:231
Generic TCP option.
Definition: tcp.h:43
#define list_for_each_entry_reverse(pos, head, member)
Iterate over entries in a list in reverse order.
Definition: list.h:445
uint8_t reserved[3]
Reserved.
Definition: tcp.c:163
#define TCP_ESTABLISHED
ESTABLISHED.
Definition: tcp.h:229
uint16_t win
Definition: tcp.h:27
uint8_t length
Definition: tcp.h:61
static struct net_device * netdev
Definition: gdbudp.c:52
const char * scheme
URI protocol name.
Definition: open.h:54
A TCP header.
Definition: tcp.h:20
#define TCP_CAN_SEND_DATA(state)
Can send data in current state.
Definition: tcp.h:297
Transport-network layer interface.
TCP data transfer interface has been closed.
Definition: tcp.c:132
uint16_t csum
Definition: tcp.h:28
static void tcp_rx_seq(struct tcp_connection *tcp, uint32_t seq_len)
Consume received sequence space.
Definition: tcp.c:1034
static int is_pending(struct pending_operation *pending)
Check if an operation is pending.
Definition: pending.h:25
#define TCP_TIME_WAIT
TIME_WAIT.
Definition: tcp.h:279
static void profile_start(struct profiler *profiler)
Start profiling.
Definition: profile.h:161
Profiling.
uint16_t dest
Definition: tcp.h:22
static int tcp_rx_data(struct tcp_connection *tcp, uint32_t seq, struct io_buffer *iobuf)
Handle TCP received data.
Definition: tcp.c:1204
uint8_t rcv_win_scale
Receive window scale.
Definition: tcp.c:105
#define list_for_each_entry_safe(pos, tmp, head, member)
Iterate over entries in a list, safe against deletion of the current entry.
Definition: list.h:459
#define TCP_SACK_MAX
Maximum number of selective acknowledgement blocks.
Definition: tcp.h:114
void process_add(struct process *process)
Add process to process list.
Definition: process.c:60
#define ENOTCONN
The socket is not connected.
Definition: errno.h:570
uint16_t st_port
TCP/IP port.
Definition: tcpip.h:82
const char * name
Protocol name.
Definition: tcpip.h:107
Generalized socket address structure.
Definition: socket.h:97
#define TCP_STATE_RCVD(flags)
TCP flags that have been received from the peer.
Definition: tcp.h:192
An object interface descriptor.
Definition: interface.h:56
unsigned int local_port
Local port.
Definition: tcp.c:49
uint8_t flags
Flags.
Definition: ena.h:18
char * strerror(int errno)
Retrieve string representation of error number.
Definition: strerror.c:79
static size_t tcp_xmit_win(struct tcp_connection *tcp)
Calculate transmission window.
Definition: tcp.c:436
void * zalloc(size_t size)
Allocate cleared memory.
Definition: malloc.c:662
static void tcp_wait_expired(struct retry_timer *timer, int over)
Shutdown timer expired.
Definition: tcp.c:849
uint8_t kind
Definition: tcp.h:44
struct startup_fn tcp_startup_fn __startup_fn(STARTUP_LATE)
TCP shutdown function.
static int tcp_open_uri(struct interface *xfer, struct uri *uri)
Open TCP URI.
Definition: tcp.c:1783
static size_t iob_len(struct io_buffer *iobuf)
Calculate length of data in an I/O buffer.
Definition: iobuf.h:160
struct tcp_sack_block sack[TCP_SACK_MAX]
Selective acknowledgement list (in host-endian order)
Definition: tcp.c:108
static struct tcp_connection * tcp_first_unfinished(void)
Find first TCP connection that has not yet finished all operations.
Definition: tcp.c:1628
Job progress.
Definition: job.h:16
#define TCP_STATE_SENT(flags)
TCP flags that have been sent in outgoing packets.
Definition: tcp.h:177
static void tcp_xfer_close(struct tcp_connection *tcp, int rc)
Close interface.
Definition: tcp.c:1696
#define INTF_OP(op_type, object_type, op_func)
Define an object interface operation.
Definition: interface.h:33
A network device.
Definition: netdevice.h:353
#define TCP_SOCK_STREAM
Definition: socket.h:24
long int random(void)
Generate a pseudo-random number between 0 and 2147483647L or 2147483562?
Definition: random.c:32
#define SOCK_STREAM
Definition: socket.h:25
int xfer_deliver(struct interface *intf, struct io_buffer *iobuf, struct xfer_metadata *meta)
Deliver datagram.
Definition: xfer.c:195
#define TCP_HAS_BEEN_ESTABLISHED(state)
Have ever been fully established.
Definition: tcp.h:307
Processes.
static int tcp_in_window(uint32_t seq, uint32_t start, uint32_t len)
Check if TCP sequence number lies within window.
Definition: tcp.h:431
unsigned long in_octets_good
Number of octets processed and passed to upper layer.
Definition: tcp.h:455
A cache discarder.
Definition: malloc.h:93
unsigned char uint8_t
Definition: stdint.h:10
static uint32_t tcp_sack_block(struct tcp_connection *tcp, uint32_t seq, struct tcp_sack_block *sack)
Find selective acknowledgement block.
Definition: tcp.c:478
Data transfer interface opening.
static struct profiler tcp_tx_profiler __profiler
Transmit profiler.
Definition: tcp.c:175
uint32_t snd_sent
Unacknowledged sequence count.
Definition: tcp.c:69
static void process_init_stopped(struct process *process, struct process_descriptor *desc, struct refcnt *refcnt)
Initialise process without adding to process list.
Definition: process.h:146
#define TCP_CLOSING_OR_LAST_ACK
CLOSING / LAST_ACK.
Definition: tcp.h:269
static int tcp_open(struct interface *xfer, struct sockaddr *peer, struct sockaddr *local)
Open a TCP connection.
Definition: tcp.c:277
int tcp_sock_stream
Linkage hack.
Definition: tcp.c:1774
unsigned int uint32_t
Definition: stdint.h:12
TCP MSS option.
Definition: tcp.h:49
const char * host
Host name.
Definition: uri.h:77
static void tcp_rx_enqueue(struct tcp_connection *tcp, uint32_t seq, uint8_t flags, struct io_buffer *iobuf)
Enqueue received TCP packet.
Definition: tcp.c:1302
#define CACHE_NORMAL
Items with a normal replacement cost.
Definition: malloc.h:114
static void tcp_expired(struct retry_timer *timer, int over)
Retransmission timer expired.
Definition: tcp.c:789
static struct xen_remove_from_physmap * remove
Definition: xenmem.h:39
uint8_t kind
Definition: tcp.h:127
static struct process_descriptor tcp_process_desc
TCP process descriptor.
Definition: tcp.c:184
Job control interfaces.
#define TCP_STATE_ACKED(flags)
TCP flags that have been acknowledged by the peer.
Definition: tcp.h:184
static int process_running(struct process *process)
Check if process is running.
Definition: process.h:176
size_t mss
Maximum segment size.
Definition: tcp.c:51
char message[32]
Message (optional)
Definition: job.h:33
void start_timer(struct retry_timer *timer)
Start timer.
Definition: retry.c:94
A transport-layer protocol of the TCP/IP stack (eg.
Definition: tcpip.h:105
Network device management.
void start_timer_fixed(struct retry_timer *timer, unsigned long timeout)
Start timer with a specified timeout.
Definition: retry.c:65
#define TCP_OPTION_END
End of TCP options list.
Definition: tcp.h:37
Pending operations.
uint32_t snd_win
Send window.
Definition: tcp.c:74
static int tcp_rx_syn(struct tcp_connection *tcp, uint32_t seq, struct tcp_options *options)
Handle TCP received SYN.
Definition: tcp.c:1075
#define iob_reserve(iobuf, len)
Definition: iobuf.h:72
#define INIT_LIST_HEAD(list)
Initialise a list head.
Definition: list.h:46
#define INTF_DESC(object_type, intf, operations)
Define an object interface descriptor.
Definition: interface.h:81
struct tcp_sack_option sackopt
Definition: tcp.h:119
void stop_timer(struct retry_timer *timer)
Stop timer.
Definition: retry.c:118
struct process process
Transmission process.
Definition: tcp.c:115
const char * sock_ntoa(struct sockaddr *sa)
Transcribe socket address.
Definition: socket.c:43
uint8_t length
Definition: tcp.h:128
struct list_head list
List of which this buffer is a member.
Definition: iobuf.h:45
static int tcp_progress(struct tcp_connection *tcp, struct job_progress *progress)
Report job progress.
Definition: tcp.c:1736
#define DBGC2(...)
Definition: compiler.h:522
uint32_t left
Definition: tcp.h:106
uint32_t mtu
Maximum MTU.
Definition: ena.h:28
struct list_head list
List of TCP connections.
Definition: tcp.c:38
#define TCP_FLAGS_SENDING(state)
TCP flags that are currently being sent in outgoing packets.
Definition: tcp.h:196
#define TCP_SYN_RCVD
SYN_RCVD.
Definition: tcp.h:222
struct tcp_window_scale_option wsopt
Definition: tcp.h:15
void * data
Start of data.
Definition: iobuf.h:53
void step(void)
Single-step a single process.
Definition: process.c:99
struct uri_opener tcp_uri_opener __uri_opener
TCP URI opener.
Definition: tcp.c:1798
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
static void tcp_dump_flags(struct tcp_connection *tcp, unsigned int flags)
Dump TCP flags.
Definition: tcp.c:238
struct tcpip_protocol tcp_protocol __tcpip_protocol
TCP protocol.
Definition: tcp.c:1566
static void tcp_xmit_sack(struct tcp_connection *tcp, uint32_t sack_seq)
Transmit any outstanding data (with selective acknowledgement)
Definition: tcp.c:613
unsigned int uri_port(const struct uri *uri, unsigned int default_port)
Get port from URI.
Definition: uri.c:456
TCP internal header.
Definition: tcp.c:146
Reference counting.
if(len >=6 *4) __asm__ __volatile__("movsl" if(len >=5 *4) __asm__ __volatile__("movsl" if(len >=4 *4) __asm__ __volatile__("movsl" if(len >=3 *4) __asm__ __volatile__("movsl" if(len >=2 *4) __asm__ __volatile__("movsl" if(len >=1 *4) __asm__ __volatile__("movsl" if((len % 4) >=2) __asm__ __volatile__("movsw" if((len % 2) >=1) __asm__ __volatile__("movsb" return dest
Definition: string.h:151
uint32_t end
Ending offset.
Definition: netvsc.h:18
uint8_t data[48]
Additional event data.
Definition: ena.h:22
uint32_t rcv_ack
Current acknowledgement number.
Definition: tcp.c:79
uint16_t mss
Definition: tcp.h:52
uint16_t src
Definition: tcp.h:21
static struct tcp_connection * tcp_demux(unsigned int local_port)
Identify TCP connection by local port number.
Definition: tcp.c:933
static unsigned int tcp_sack(struct tcp_connection *tcp, uint32_t seq)
Update TCP selective acknowledgement list.
Definition: tcp.c:514
A pending operation.
Definition: pending.h:14
int snprintf(char *buf, size_t size, const char *fmt,...)
Write a formatted string to a buffer.
Definition: vsprintf.c:383
A Uniform Resource Identifier.
Definition: uri.h:65
unsigned long in_octets
Number of octets received (including duplicate data)
Definition: tcp.h:453
static int32_t tcp_cmp(uint32_t seq1, uint32_t seq2)
Compare TCP sequence numbers.
Definition: tcp.h:419
struct mschapv2_challenge peer
Peer challenge.
Definition: mschapv2.h:12
struct interface xfer
Data transfer interface.
Definition: tcp.c:44
#define TCP_FINISH_TIMEOUT
TCP finish wait time.
Definition: tcp.h:441
TCP acknowledgement is pending.
Definition: tcp.c:136
static struct interface_descriptor tcp_xfer_desc
TCP data transfer interface descriptor.
Definition: tcp.c:185
#define TCP_KEEPALIVE_DELAY
TCP keepalive period.
Definition: tcp.h:394
uint8_t meta
Metadata flags.
Definition: ena.h:14
#define TCP_PATH_MTU
Path MTU.
Definition: tcp.h:379
uint32_t tsval
Definition: tcp.h:129
static void tcp_xmit(struct tcp_connection *tcp)
Transmit any outstanding data.
Definition: tcp.c:773
unsigned long currticks(void)
Get current system time in ticks.
Definition: timer.c:43
static int tcp_xfer_deliver(struct tcp_connection *tcp, struct io_buffer *iobuf, struct xfer_metadata *meta __unused)
Deliver datagram as I/O buffer.
Definition: tcp.c:1713
#define TCP_MAX_HEADER_LEN
TCP maximum header length.
Definition: tcp.h:400
#define DBG(...)
Print a debugging message.
Definition: compiler.h:498
static void intf_init(struct interface *intf, struct interface_descriptor *desc, struct refcnt *refcnt)
Initialise an object interface.
Definition: interface.h:204
struct tcp_statistics tcp_stats
TCP statistics.
Definition: tcp.c:172
TCP selective acknowledgement block.
Definition: tcp.h:105
A URI opener.
Definition: open.h:48
static int tcp_rx_ack(struct tcp_connection *tcp, uint32_t ack, uint32_t win)
Handle TCP received ACK.
Definition: tcp.c:1119
int tcpip_tx(struct io_buffer *iobuf, struct tcpip_protocol *tcpip_protocol, struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, struct net_device *netdev, uint16_t *trans_csum)
Transmit a TCP/IP packet.
Definition: tcpip.c:92
uint16_t tcpip_chksum(const void *data, size_t len)
Calculate TCP/IP checkum.
Definition: tcpip.c:204
TCP protocol.
#define ENETUNREACH
Network unreachable.
Definition: errno.h:489
struct pending_operation pending_flags
Pending operations for SYN and FIN.
Definition: tcp.c:124
#define IP_TCP
Definition: in.h:14
#define NULL
NULL pointer (VOID *)
Definition: Base.h:322
#define ETIMEDOUT
Connection timed out.
Definition: errno.h:670
String functions.
#define TCP_CLOSED_GRACEFULLY(state)
Have closed gracefully.
Definition: tcp.h:317
uint8_t flags
Flags.
Definition: tcp.c:161
#define htons(value)
Definition: byteswap.h:136
#define TCP_CLOSED
CLOSED.
Definition: tcp.h:203
void pending_get(struct pending_operation *pending)
Mark an operation as pending.
Definition: pending.c:46
static void tcp_process_rx_queue(struct tcp_connection *tcp)
Process receive queue.
Definition: tcp.c:1360
uint16_t tcpip_continue_chksum(uint16_t partial, const void *data, size_t len)
Calculate continued TCP/IP checkum.
Definition: x86_tcpip.c:46
uint8_t hlen
Definition: tcp.h:25
static LIST_HEAD(tcp_conns)
List of registered TCP connections.
int xfer_open_named_socket(struct interface *xfer, int semantics, struct sockaddr *peer, const char *name, struct sockaddr *local)
Open named socket.
Definition: resolv.c:403
static void tcp_dump_state(struct tcp_connection *tcp)
Dump TCP state transition.
Definition: tcp.c:222
#define ref_put(refcnt)
Drop reference to object.
Definition: refcnt.h:107
struct tcp_sack_permitted_option spopt
Definition: tcp.h:92
uint32_t rcv_win
Receive window.
Definition: tcp.c:84
struct sockaddr_tcpip peer
Remote socket address.
Definition: tcp.c:47
#define TCP_FIN_WAIT_2
FIN_WAIT_2.
Definition: tcp.h:256
void * memset(void *dest, int character, size_t len) __nonnull
A socket opener.
Definition: open.h:71
A persistent I/O buffer.
Definition: iobuf.h:38
uint8_t length
Definition: tcp.h:51