iPXE
tcp.h
Go to the documentation of this file.
1 #ifndef _IPXE_TCP_H
2 #define _IPXE_TCP_H
3 
4 /** @file
5  *
6  * TCP protocol
7  *
8  * This file defines the iPXE TCP API.
9  *
10  */
11 
12 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
13 FILE_SECBOOT ( PERMITTED );
14 
15 #include <ipxe/tcpip.h>
16 
17 /**
18  * A TCP header
19  */
20 struct tcp_header {
21  uint16_t src; /* Source port */
22  uint16_t dest; /* Destination port */
23  uint32_t seq; /* Sequence number */
24  uint32_t ack; /* Acknowledgement number */
25  uint8_t hlen; /* Header length (4), Reserved (4) */
26  uint8_t flags; /* Reserved (2), Flags (6) */
27  uint16_t win; /* Advertised window */
28  uint16_t csum; /* Checksum */
29  uint16_t urg; /* Urgent pointer */
30 } __attribute__ (( packed ));
31 
32 /** @defgroup tcpopts TCP options
33  * @{
34  */
35 
36 /** End of TCP options list */
37 #define TCP_OPTION_END 0
38 
39 /** TCP option pad */
40 #define TCP_OPTION_NOP 1
41 
42 /** Generic TCP option */
43 struct tcp_option {
46 } __attribute__ (( packed ));
47 
48 /** TCP MSS option */
53 } __attribute__ (( packed ));
54 
55 /** Code for the TCP MSS option */
56 #define TCP_OPTION_MSS 2
57 
58 /** TCP window scale option */
63 } __attribute__ (( packed ));
64 
65 /** Padded TCP window scale option (used for sending) */
69 } __attribute (( packed ));
70 
71 /** Code for the TCP window scale option */
72 #define TCP_OPTION_WS 3
73 
74 /** Advertised TCP window scale
75  *
76  * Using a scale factor of 2**9 provides for a maximum window of 32MB,
77  * which is sufficient to allow Gigabit-speed transfers with a 200ms
78  * RTT. The minimum advertised window is 512 bytes, which is still
79  * less than a single packet.
80  */
81 #define TCP_RX_WINDOW_SCALE 9
82 
83 /** TCP selective acknowledgement permitted option */
87 } __attribute__ (( packed ));
88 
89 /** Padded TCP selective acknowledgement permitted option (used for sending) */
93 } __attribute__ (( packed ));
94 
95 /** Code for the TCP selective acknowledgement permitted option */
96 #define TCP_OPTION_SACK_PERMITTED 4
97 
98 /** TCP selective acknowledgement option */
102 } __attribute__ (( packed ));
103 
104 /** TCP selective acknowledgement block */
108 } __attribute__ (( packed ));
109 
110 /** Maximum number of selective acknowledgement blocks
111  *
112  * This allows for the presence of the TCP timestamp option.
113  */
114 #define TCP_SACK_MAX 3
115 
116 /** Padded TCP selective acknowledgement option (used for sending) */
120 } __attribute__ (( packed ));
121 
122 /** Code for the TCP selective acknowledgement option */
123 #define TCP_OPTION_SACK 5
124 
125 /** TCP timestamp option */
131 } __attribute__ (( packed ));
132 
133 /** Padded TCP timestamp option (used for sending) */
137 } __attribute__ (( packed ));
138 
139 /** Code for the TCP timestamp option */
140 #define TCP_OPTION_TS 8
141 
142 /** Parsed TCP options */
143 struct tcp_options {
144  /** Window scale option, if present */
146  /** SACK permitted option, if present */
148  /** Timestamp option, if present */
150 };
151 
152 /** @} */
153 
154 /*
155  * TCP flags
156  */
157 #define TCP_CWR 0x80
158 #define TCP_ECE 0x40
159 #define TCP_URG 0x20
160 #define TCP_ACK 0x10
161 #define TCP_PSH 0x08
162 #define TCP_RST 0x04
163 #define TCP_SYN 0x02
164 #define TCP_FIN 0x01
165 
166 /**
167 * @defgroup tcpstates TCP states
168 *
169 * The TCP state is defined by a combination of the flags that have
170 * been sent to the peer, the flags that have been acknowledged by the
171 * peer, and the flags that have been received from the peer.
172 *
173 * @{
174 */
175 
176 /** TCP flags that have been sent in outgoing packets */
177 #define TCP_STATE_SENT(flags) ( (flags) << 0 )
178 #define TCP_FLAGS_SENT(state) ( ( (state) >> 0 ) & 0xff )
179 
180 /** TCP flags that have been acknowledged by the peer
181  *
182  * Note that this applies only to SYN and FIN.
183  */
184 #define TCP_STATE_ACKED(flags) ( (flags) << 8 )
185 #define TCP_FLAGS_ACKED(state) ( ( (state) >> 8 ) & 0xff )
186 
187 /** TCP flags that have been received from the peer
188  *
189  * Note that this applies only to SYN and FIN, and that once SYN has
190  * been received, we should always be sending ACK.
191  */
192 #define TCP_STATE_RCVD(flags) ( (flags) << 16 )
193 #define TCP_FLAGS_RCVD(state) ( ( (state) >> 16 ) & 0xff )
194 
195 /** TCP flags that are currently being sent in outgoing packets */
196 #define TCP_FLAGS_SENDING(state) \
197  ( TCP_FLAGS_SENT ( state ) & ~TCP_FLAGS_ACKED ( state ) )
198 
199 /** CLOSED
200  *
201  * The connection has not yet been used for anything.
202  */
203 #define TCP_CLOSED TCP_RST
204 
205 /** LISTEN
206  *
207  * Not currently used as a state; we have no support for listening
208  * connections. Given a unique value to avoid compiler warnings.
209  */
210 #define TCP_LISTEN 0
211 
212 /** SYN_SENT
213  *
214  * SYN has been sent, nothing has yet been received or acknowledged.
215  */
216 #define TCP_SYN_SENT ( TCP_STATE_SENT ( TCP_SYN ) )
217 
218 /** SYN_RCVD
219  *
220  * SYN has been sent but not acknowledged, SYN has been received.
221  */
222 #define TCP_SYN_RCVD ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
223  TCP_STATE_RCVD ( TCP_SYN ) )
224 
225 /** ESTABLISHED
226  *
227  * SYN has been sent and acknowledged, SYN has been received.
228  */
229 #define TCP_ESTABLISHED ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
230  TCP_STATE_ACKED ( TCP_SYN ) | \
231  TCP_STATE_RCVD ( TCP_SYN ) )
232 
233 /** FIN_WAIT_1
234  *
235  * SYN has been sent and acknowledged, SYN has been received, FIN has
236  * been sent but not acknowledged, FIN has not been received.
237  *
238  * RFC 793 shows that we can enter FIN_WAIT_1 without have had SYN
239  * acknowledged, i.e. if the application closes the connection after
240  * sending and receiving SYN, but before having had SYN acknowledged.
241  * However, we have to *pretend* that SYN has been acknowledged
242  * anyway, otherwise we end up sending SYN and FIN in the same
243  * sequence number slot. Therefore, when we transition from SYN_RCVD
244  * to FIN_WAIT_1, we have to remember to set TCP_STATE_ACKED(TCP_SYN)
245  * and increment our sequence number.
246  */
247 #define TCP_FIN_WAIT_1 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
248  TCP_STATE_ACKED ( TCP_SYN ) | \
249  TCP_STATE_RCVD ( TCP_SYN ) )
250 
251 /** FIN_WAIT_2
252  *
253  * SYN has been sent and acknowledged, SYN has been received, FIN has
254  * been sent and acknowledged, FIN ha not been received.
255  */
256 #define TCP_FIN_WAIT_2 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
257  TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
258  TCP_STATE_RCVD ( TCP_SYN ) )
259 
260 /** CLOSING / LAST_ACK
261  *
262  * SYN has been sent and acknowledged, SYN has been received, FIN has
263  * been sent but not acknowledged, FIN has been received.
264  *
265  * This state actually encompasses both CLOSING and LAST_ACK; they are
266  * identical with the definition of state that we use. I don't
267  * *believe* that they need to be distinguished.
268  */
269 #define TCP_CLOSING_OR_LAST_ACK \
270  ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
271  TCP_STATE_ACKED ( TCP_SYN ) | \
272  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
273 
274 /** TIME_WAIT
275  *
276  * SYN has been sent and acknowledged, SYN has been received, FIN has
277  * been sent and acknowledged, FIN has been received.
278  */
279 #define TCP_TIME_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
280  TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
281  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
282 
283 /** CLOSE_WAIT
284  *
285  * SYN has been sent and acknowledged, SYN has been received, FIN has
286  * been received.
287  */
288 #define TCP_CLOSE_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
289  TCP_STATE_ACKED ( TCP_SYN ) | \
290  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
291 
292 /** Can send data in current state
293  *
294  * We can send data if and only if we have had our SYN acked and we
295  * have not yet sent our FIN.
296  */
297 #define TCP_CAN_SEND_DATA(state) \
298  ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
299  TCP_STATE_SENT ( TCP_FIN ) ) ) \
300  == TCP_STATE_ACKED ( TCP_SYN ) )
301 
302 /** Have ever been fully established
303  *
304  * We have been fully established if we have both received a SYN and
305  * had our own SYN acked.
306  */
307 #define TCP_HAS_BEEN_ESTABLISHED(state) \
308  ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
309  TCP_STATE_RCVD ( TCP_SYN ) ) ) \
310  == ( TCP_STATE_ACKED ( TCP_SYN ) | TCP_STATE_RCVD ( TCP_SYN ) ) )
311 
312 /** Have closed gracefully
313  *
314  * We have closed gracefully if we have both received a FIN and had
315  * our own FIN acked.
316  */
317 #define TCP_CLOSED_GRACEFULLY(state) \
318  ( ( (state) & ( TCP_STATE_ACKED ( TCP_FIN ) | \
319  TCP_STATE_RCVD ( TCP_FIN ) ) ) \
320  == ( TCP_STATE_ACKED ( TCP_FIN ) | TCP_STATE_RCVD ( TCP_FIN ) ) )
321 
322 /** @} */
323 
324 /** Mask for TCP header length field */
325 #define TCP_MASK_HLEN 0xf0
326 
327 /** Smallest port number on which a TCP connection can listen */
328 #define TCP_MIN_PORT 1
329 
330 /**
331  * Maxmimum advertised TCP window size
332  *
333  * The maximum bandwidth on any link is limited by
334  *
335  * max_bandwidth * round_trip_time = tcp_window
336  *
337  * Some rough expectations for achievable bandwidths over various
338  * links are:
339  *
340  * a) Gigabit LAN: expected bandwidth 125MB/s, typical RTT 0.5ms,
341  * minimum required window 64kB
342  *
343  * b) 10-Gigabit LAN: expected bandwidth 1250MB/s, typical RTT
344  * 0.5ms, minimum required window 640kB
345  *
346  * c) Home Internet connection: expected bandwidth 50MB/s, typical
347  * RTT 25ms, minimum required window 1280kB
348  *
349  * d) International WAN: expected bandwidth 50MB/s, typical RTT
350  * 25ms, minimum required window 1280kB
351  *
352  * e) Intercontinental WAN: expected bandwidth 5MB/s, typical RTT
353  * 250ms, minimum required window 1280kB.
354  *
355  * The maximum possible value for the TCP window size is 1GB (using
356  * the maximum window scale of 2**14). However, it is advisable to
357  * keep the window size as small as possible (without limiting
358  * bandwidth), since in the event of a lost packet the window size
359  * represents the maximum amount that will need to be retransmitted.
360  *
361  * We therefore choose a (rounded up) maximum window size of 2048kB.
362  */
363 #define TCP_MAX_WINDOW_SIZE ( 2048 * 1024 )
364 
365 /**
366  * Path MTU
367  *
368  * IPv6 requires all data link layers to support a datagram size of
369  * 1280 bytes. We choose to use this as our maximum transmitted
370  * datagram size, on the assumption that any practical link layer we
371  * encounter will allow this size. This is a very conservative
372  * assumption in practice, but the impact of making such a
373  * conservative assumption is insignificant since the amount of data
374  * that we transmit (rather than receive) is negligible.
375  *
376  * We allow space within this 1280 bytes for an IPv6 header, a TCP
377  * header, and a (padded) TCP timestamp option.
378  */
379 #define TCP_PATH_MTU \
380  ( 1280 - 40 /* IPv6 */ - 20 /* TCP */ - 12 /* TCP timestamp */ )
381 
382 /** TCP maximum segment lifetime
383  *
384  * Currently set to 2 minutes, as per RFC 793.
385  */
386 #define TCP_MSL ( 2 * 60 * TICKS_PER_SEC )
387 
388 /**
389  * TCP keepalive period
390  *
391  * We send keepalive ACKs after this period of inactivity has elapsed
392  * on an established connection.
393  */
394 #define TCP_KEEPALIVE_DELAY ( 15 * TICKS_PER_SEC )
395 
396 /**
397  * TCP maximum header length
398  *
399  */
400 #define TCP_MAX_HEADER_LEN \
401  ( MAX_LL_NET_HEADER_LEN + \
402  sizeof ( struct tcp_header ) + \
403  sizeof ( struct tcp_mss_option ) + \
404  sizeof ( struct tcp_window_scale_padded_option ) + \
405  sizeof ( struct tcp_timestamp_padded_option ) )
406 
407 /**
408  * Compare TCP sequence numbers
409  *
410  * @v seq1 Sequence number 1
411  * @v seq2 Sequence number 2
412  * @ret diff Sequence difference
413  *
414  * Analogous to memcmp(), returns an integer less than, equal to, or
415  * greater than zero if @c seq1 is found, respectively, to be before,
416  * equal to, or after @c seq2.
417  */
418 static inline __attribute__ (( always_inline )) int32_t
419 tcp_cmp ( uint32_t seq1, uint32_t seq2 ) {
420  return ( ( int32_t ) ( seq1 - seq2 ) );
421 }
422 
423 /**
424  * Check if TCP sequence number lies within window
425  *
426  * @v seq Sequence number
427  * @v start Start of window
428  * @v len Length of window
429  * @ret in_window Sequence number is within window
430  */
431 static inline int tcp_in_window ( uint32_t seq, uint32_t start,
432  uint32_t len ) {
433  return ( ( seq - start ) < len );
434 }
435 
436 /** TCP finish wait time
437  *
438  * Currently set to one second, since we should not allow a slowly
439  * responding server to substantially delay a call to shutdown().
440  */
441 #define TCP_FINISH_TIMEOUT ( 1 * TICKS_PER_SEC )
442 
443 /** TCP statistics */
445  /** Number of packets received */
446  unsigned long in_segs;
447  /** Total number of packets discarded due to lack of memory */
448  unsigned long in_discards;
449  /** Total number of packets received out of order */
450  unsigned long in_out_of_order;
451 
452  /** Number of octets received (including duplicate data) */
453  unsigned long in_octets;
454  /** Number of octets processed and passed to upper layer */
455  unsigned long in_octets_good;
456 };
457 
458 extern struct tcpip_protocol tcp_protocol __tcpip_protocol;
459 
460 extern struct tcp_statistics tcp_stats;
461 
462 #endif /* _IPXE_TCP_H */
unsigned long in_out_of_order
Total number of packets received out of order.
Definition: tcp.h:450
struct tcp_timestamp_option tsopt
Definition: tcp.h:136
unsigned short uint16_t
Definition: stdint.h:11
Padded TCP window scale option (used for sending)
Definition: tcp.h:66
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
uint8_t nop[2]
Definition: tcp.h:118
Parsed TCP options.
Definition: tcp.h:143
TCP selective acknowledgement option.
Definition: tcp.h:99
struct tcp_header __attribute__
uint8_t kind
Definition: tcp.h:50
Padded TCP selective acknowledgement option (used for sending)
Definition: tcp.h:117
TCP window scale option.
Definition: tcp.h:59
struct tcp_sack_permitted_option __attribute
uint8_t length
Definition: tcp.h:45
uint8_t kind
Definition: tcp.h:100
uint8_t length
Definition: tcp.h:101
const struct tcp_window_scale_option * wsopt
Window scale option, if present.
Definition: tcp.h:145
uint32_t tsecr
Definition: tcp.h:130
uint8_t flags
Definition: tcp.h:26
uint32_t seq
Definition: tcp.h:23
unsigned long in_segs
Number of packets received.
Definition: tcp.h:446
TCP statistics.
Definition: tcp.h:444
u16 seq
802.11 Sequence Control field
Definition: ieee80211.h:19
uint32_t start
Starting offset.
Definition: netvsc.h:12
uint16_t urg
Definition: tcp.h:29
uint32_t right
Definition: tcp.h:107
const struct tcp_sack_permitted_option * spopt
SACK permitted option, if present.
Definition: tcp.h:147
Padded TCP selective acknowledgement permitted option (used for sending)
Definition: tcp.h:90
TCP timestamp option.
Definition: tcp.h:126
uint32_t ack
Definition: tcp.h:24
unsigned long in_discards
Total number of packets discarded due to lack of memory.
Definition: tcp.h:448
Padded TCP timestamp option (used for sending)
Definition: tcp.h:134
FILE_SECBOOT(PERMITTED)
ring len
Length.
Definition: dwmac.h:231
Generic TCP option.
Definition: tcp.h:43
uint16_t win
Definition: tcp.h:27
uint8_t length
Definition: tcp.h:61
A TCP header.
Definition: tcp.h:20
Transport-network layer interface.
uint16_t csum
Definition: tcp.h:28
uint16_t dest
Definition: tcp.h:22
struct tcp_statistics tcp_stats
TCP statistics.
Definition: tcp.c:172
uint8_t kind
Definition: tcp.h:44
static int tcp_in_window(uint32_t seq, uint32_t start, uint32_t len)
Check if TCP sequence number lies within window.
Definition: tcp.h:431
unsigned long in_octets_good
Number of octets processed and passed to upper layer.
Definition: tcp.h:455
unsigned char uint8_t
Definition: stdint.h:10
unsigned int uint32_t
Definition: stdint.h:12
TCP MSS option.
Definition: tcp.h:49
uint8_t kind
Definition: tcp.h:127
A transport-layer protocol of the TCP/IP stack (eg.
Definition: tcpip.h:105
struct tcpip_protocol tcp_protocol __tcpip_protocol
ICMPv4 TCP/IP protocol.
Definition: icmpv4.c:101
struct tcp_sack_option sackopt
Definition: tcp.h:119
uint8_t length
Definition: tcp.h:128
signed int int32_t
Definition: stdint.h:17
struct tcp_window_scale_option wsopt
Definition: tcp.h:68
uint32_t left
Definition: tcp.h:106
uint16_t mss
Definition: tcp.h:52
uint16_t src
Definition: tcp.h:21
unsigned long in_octets
Number of octets received (including duplicate data)
Definition: tcp.h:453
static int32_t tcp_cmp(uint32_t seq1, uint32_t seq2)
Compare TCP sequence numbers.
Definition: tcp.h:419
uint32_t tsval
Definition: tcp.h:129
const struct tcp_timestamp_option * tsopt
Timestamp option, if present.
Definition: tcp.h:149
TCP selective acknowledgement permitted option.
Definition: tcp.h:84
TCP selective acknowledgement block.
Definition: tcp.h:105
uint8_t hlen
Definition: tcp.h:25
struct tcp_sack_permitted_option spopt
Definition: tcp.h:92
uint8_t length
Definition: tcp.h:51