iPXE
tcp.h
Go to the documentation of this file.
1 #ifndef _IPXE_TCP_H
2 #define _IPXE_TCP_H
3 
4 /** @file
5  *
6  * TCP protocol
7  *
8  * This file defines the iPXE TCP API.
9  *
10  */
11 
12 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
13 
14 #include <ipxe/tcpip.h>
15 
16 /**
17  * A TCP header
18  */
19 struct tcp_header {
20  uint16_t src; /* Source port */
21  uint16_t dest; /* Destination port */
22  uint32_t seq; /* Sequence number */
23  uint32_t ack; /* Acknowledgement number */
24  uint8_t hlen; /* Header length (4), Reserved (4) */
25  uint8_t flags; /* Reserved (2), Flags (6) */
26  uint16_t win; /* Advertised window */
27  uint16_t csum; /* Checksum */
28  uint16_t urg; /* Urgent pointer */
29 } __attribute__ (( packed ));
30 
31 /** @defgroup tcpopts TCP options
32  * @{
33  */
34 
35 /** End of TCP options list */
36 #define TCP_OPTION_END 0
37 
38 /** TCP option pad */
39 #define TCP_OPTION_NOP 1
40 
41 /** Generic TCP option */
42 struct tcp_option {
45 } __attribute__ (( packed ));
46 
47 /** TCP MSS option */
52 } __attribute__ (( packed ));
53 
54 /** Code for the TCP MSS option */
55 #define TCP_OPTION_MSS 2
56 
57 /** TCP window scale option */
62 } __attribute__ (( packed ));
63 
64 /** Padded TCP window scale option (used for sending) */
68 } __attribute (( packed ));
69 
70 /** Code for the TCP window scale option */
71 #define TCP_OPTION_WS 3
72 
73 /** Advertised TCP window scale
74  *
75  * Using a scale factor of 2**9 provides for a maximum window of 32MB,
76  * which is sufficient to allow Gigabit-speed transfers with a 200ms
77  * RTT. The minimum advertised window is 512 bytes, which is still
78  * less than a single packet.
79  */
80 #define TCP_RX_WINDOW_SCALE 9
81 
82 /** TCP selective acknowledgement permitted option */
86 } __attribute__ (( packed ));
87 
88 /** Padded TCP selective acknowledgement permitted option (used for sending) */
92 } __attribute__ (( packed ));
93 
94 /** Code for the TCP selective acknowledgement permitted option */
95 #define TCP_OPTION_SACK_PERMITTED 4
96 
97 /** TCP selective acknowledgement option */
101 } __attribute__ (( packed ));
102 
103 /** TCP selective acknowledgement block */
107 } __attribute__ (( packed ));
108 
109 /** Maximum number of selective acknowledgement blocks
110  *
111  * This allows for the presence of the TCP timestamp option.
112  */
113 #define TCP_SACK_MAX 3
114 
115 /** Padded TCP selective acknowledgement option (used for sending) */
119 } __attribute__ (( packed ));
120 
121 /** Code for the TCP selective acknowledgement option */
122 #define TCP_OPTION_SACK 5
123 
124 /** TCP timestamp option */
130 } __attribute__ (( packed ));
131 
132 /** Padded TCP timestamp option (used for sending) */
136 } __attribute__ (( packed ));
137 
138 /** Code for the TCP timestamp option */
139 #define TCP_OPTION_TS 8
140 
141 /** Parsed TCP options */
142 struct tcp_options {
143  /** Window scale option, if present */
145  /** SACK permitted option, if present */
147  /** Timestamp option, if present */
149 };
150 
151 /** @} */
152 
153 /*
154  * TCP flags
155  */
156 #define TCP_CWR 0x80
157 #define TCP_ECE 0x40
158 #define TCP_URG 0x20
159 #define TCP_ACK 0x10
160 #define TCP_PSH 0x08
161 #define TCP_RST 0x04
162 #define TCP_SYN 0x02
163 #define TCP_FIN 0x01
164 
165 /**
166 * @defgroup tcpstates TCP states
167 *
168 * The TCP state is defined by a combination of the flags that have
169 * been sent to the peer, the flags that have been acknowledged by the
170 * peer, and the flags that have been received from the peer.
171 *
172 * @{
173 */
174 
175 /** TCP flags that have been sent in outgoing packets */
176 #define TCP_STATE_SENT(flags) ( (flags) << 0 )
177 #define TCP_FLAGS_SENT(state) ( ( (state) >> 0 ) & 0xff )
178 
179 /** TCP flags that have been acknowledged by the peer
180  *
181  * Note that this applies only to SYN and FIN.
182  */
183 #define TCP_STATE_ACKED(flags) ( (flags) << 8 )
184 #define TCP_FLAGS_ACKED(state) ( ( (state) >> 8 ) & 0xff )
185 
186 /** TCP flags that have been received from the peer
187  *
188  * Note that this applies only to SYN and FIN, and that once SYN has
189  * been received, we should always be sending ACK.
190  */
191 #define TCP_STATE_RCVD(flags) ( (flags) << 16 )
192 #define TCP_FLAGS_RCVD(state) ( ( (state) >> 16 ) & 0xff )
193 
194 /** TCP flags that are currently being sent in outgoing packets */
195 #define TCP_FLAGS_SENDING(state) \
196  ( TCP_FLAGS_SENT ( state ) & ~TCP_FLAGS_ACKED ( state ) )
197 
198 /** CLOSED
199  *
200  * The connection has not yet been used for anything.
201  */
202 #define TCP_CLOSED TCP_RST
203 
204 /** LISTEN
205  *
206  * Not currently used as a state; we have no support for listening
207  * connections. Given a unique value to avoid compiler warnings.
208  */
209 #define TCP_LISTEN 0
210 
211 /** SYN_SENT
212  *
213  * SYN has been sent, nothing has yet been received or acknowledged.
214  */
215 #define TCP_SYN_SENT ( TCP_STATE_SENT ( TCP_SYN ) )
216 
217 /** SYN_RCVD
218  *
219  * SYN has been sent but not acknowledged, SYN has been received.
220  */
221 #define TCP_SYN_RCVD ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
222  TCP_STATE_RCVD ( TCP_SYN ) )
223 
224 /** ESTABLISHED
225  *
226  * SYN has been sent and acknowledged, SYN has been received.
227  */
228 #define TCP_ESTABLISHED ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
229  TCP_STATE_ACKED ( TCP_SYN ) | \
230  TCP_STATE_RCVD ( TCP_SYN ) )
231 
232 /** FIN_WAIT_1
233  *
234  * SYN has been sent and acknowledged, SYN has been received, FIN has
235  * been sent but not acknowledged, FIN has not been received.
236  *
237  * RFC 793 shows that we can enter FIN_WAIT_1 without have had SYN
238  * acknowledged, i.e. if the application closes the connection after
239  * sending and receiving SYN, but before having had SYN acknowledged.
240  * However, we have to *pretend* that SYN has been acknowledged
241  * anyway, otherwise we end up sending SYN and FIN in the same
242  * sequence number slot. Therefore, when we transition from SYN_RCVD
243  * to FIN_WAIT_1, we have to remember to set TCP_STATE_ACKED(TCP_SYN)
244  * and increment our sequence number.
245  */
246 #define TCP_FIN_WAIT_1 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
247  TCP_STATE_ACKED ( TCP_SYN ) | \
248  TCP_STATE_RCVD ( TCP_SYN ) )
249 
250 /** FIN_WAIT_2
251  *
252  * SYN has been sent and acknowledged, SYN has been received, FIN has
253  * been sent and acknowledged, FIN ha not been received.
254  */
255 #define TCP_FIN_WAIT_2 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
256  TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
257  TCP_STATE_RCVD ( TCP_SYN ) )
258 
259 /** CLOSING / LAST_ACK
260  *
261  * SYN has been sent and acknowledged, SYN has been received, FIN has
262  * been sent but not acknowledged, FIN has been received.
263  *
264  * This state actually encompasses both CLOSING and LAST_ACK; they are
265  * identical with the definition of state that we use. I don't
266  * *believe* that they need to be distinguished.
267  */
268 #define TCP_CLOSING_OR_LAST_ACK \
269  ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
270  TCP_STATE_ACKED ( TCP_SYN ) | \
271  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
272 
273 /** TIME_WAIT
274  *
275  * SYN has been sent and acknowledged, SYN has been received, FIN has
276  * been sent and acknowledged, FIN has been received.
277  */
278 #define TCP_TIME_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
279  TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
280  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
281 
282 /** CLOSE_WAIT
283  *
284  * SYN has been sent and acknowledged, SYN has been received, FIN has
285  * been received.
286  */
287 #define TCP_CLOSE_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
288  TCP_STATE_ACKED ( TCP_SYN ) | \
289  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
290 
291 /** Can send data in current state
292  *
293  * We can send data if and only if we have had our SYN acked and we
294  * have not yet sent our FIN.
295  */
296 #define TCP_CAN_SEND_DATA(state) \
297  ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
298  TCP_STATE_SENT ( TCP_FIN ) ) ) \
299  == TCP_STATE_ACKED ( TCP_SYN ) )
300 
301 /** Have ever been fully established
302  *
303  * We have been fully established if we have both received a SYN and
304  * had our own SYN acked.
305  */
306 #define TCP_HAS_BEEN_ESTABLISHED(state) \
307  ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
308  TCP_STATE_RCVD ( TCP_SYN ) ) ) \
309  == ( TCP_STATE_ACKED ( TCP_SYN ) | TCP_STATE_RCVD ( TCP_SYN ) ) )
310 
311 /** Have closed gracefully
312  *
313  * We have closed gracefully if we have both received a FIN and had
314  * our own FIN acked.
315  */
316 #define TCP_CLOSED_GRACEFULLY(state) \
317  ( ( (state) & ( TCP_STATE_ACKED ( TCP_FIN ) | \
318  TCP_STATE_RCVD ( TCP_FIN ) ) ) \
319  == ( TCP_STATE_ACKED ( TCP_FIN ) | TCP_STATE_RCVD ( TCP_FIN ) ) )
320 
321 /** @} */
322 
323 /** Mask for TCP header length field */
324 #define TCP_MASK_HLEN 0xf0
325 
326 /** Smallest port number on which a TCP connection can listen */
327 #define TCP_MIN_PORT 1
328 
329 /**
330  * Maxmimum advertised TCP window size
331  *
332  * The maximum bandwidth on any link is limited by
333  *
334  * max_bandwidth * round_trip_time = tcp_window
335  *
336  * Some rough expectations for achievable bandwidths over various
337  * links are:
338  *
339  * a) Gigabit LAN: expected bandwidth 125MB/s, typical RTT 0.5ms,
340  * minimum required window 64kB
341  *
342  * b) 10-Gigabit LAN: expected bandwidth 1250MB/s, typical RTT
343  * 0.5ms, minimum required window 640kB
344  *
345  * c) Home Internet connection: expected bandwidth 50MB/s, typical
346  * RTT 25ms, minimum required window 1280kB
347  *
348  * d) International WAN: expected bandwidth 50MB/s, typical RTT
349  * 25ms, minimum required window 1280kB
350  *
351  * e) Intercontinental WAN: expected bandwidth 5MB/s, typical RTT
352  * 250ms, minimum required window 1280kB.
353  *
354  * The maximum possible value for the TCP window size is 1GB (using
355  * the maximum window scale of 2**14). However, it is advisable to
356  * keep the window size as small as possible (without limiting
357  * bandwidth), since in the event of a lost packet the window size
358  * represents the maximum amount that will need to be retransmitted.
359  *
360  * We therefore choose a (rounded up) maximum window size of 2048kB.
361  */
362 #define TCP_MAX_WINDOW_SIZE ( 2048 * 1024 )
363 
364 /**
365  * Path MTU
366  *
367  * IPv6 requires all data link layers to support a datagram size of
368  * 1280 bytes. We choose to use this as our maximum transmitted
369  * datagram size, on the assumption that any practical link layer we
370  * encounter will allow this size. This is a very conservative
371  * assumption in practice, but the impact of making such a
372  * conservative assumption is insignificant since the amount of data
373  * that we transmit (rather than receive) is negligible.
374  *
375  * We allow space within this 1280 bytes for an IPv6 header, a TCP
376  * header, and a (padded) TCP timestamp option.
377  */
378 #define TCP_PATH_MTU \
379  ( 1280 - 40 /* IPv6 */ - 20 /* TCP */ - 12 /* TCP timestamp */ )
380 
381 /** TCP maximum segment lifetime
382  *
383  * Currently set to 2 minutes, as per RFC 793.
384  */
385 #define TCP_MSL ( 2 * 60 * TICKS_PER_SEC )
386 
387 /**
388  * TCP keepalive period
389  *
390  * We send keepalive ACKs after this period of inactivity has elapsed
391  * on an established connection.
392  */
393 #define TCP_KEEPALIVE_DELAY ( 15 * TICKS_PER_SEC )
394 
395 /**
396  * TCP maximum header length
397  *
398  */
399 #define TCP_MAX_HEADER_LEN \
400  ( MAX_LL_NET_HEADER_LEN + \
401  sizeof ( struct tcp_header ) + \
402  sizeof ( struct tcp_mss_option ) + \
403  sizeof ( struct tcp_window_scale_padded_option ) + \
404  sizeof ( struct tcp_timestamp_padded_option ) )
405 
406 /**
407  * Compare TCP sequence numbers
408  *
409  * @v seq1 Sequence number 1
410  * @v seq2 Sequence number 2
411  * @ret diff Sequence difference
412  *
413  * Analogous to memcmp(), returns an integer less than, equal to, or
414  * greater than zero if @c seq1 is found, respectively, to be before,
415  * equal to, or after @c seq2.
416  */
417 static inline __attribute__ (( always_inline )) int32_t
418 tcp_cmp ( uint32_t seq1, uint32_t seq2 ) {
419  return ( ( int32_t ) ( seq1 - seq2 ) );
420 }
421 
422 /**
423  * Check if TCP sequence number lies within window
424  *
425  * @v seq Sequence number
426  * @v start Start of window
427  * @v len Length of window
428  * @ret in_window Sequence number is within window
429  */
430 static inline int tcp_in_window ( uint32_t seq, uint32_t start,
431  uint32_t len ) {
432  return ( ( seq - start ) < len );
433 }
434 
435 /** TCP finish wait time
436  *
437  * Currently set to one second, since we should not allow a slowly
438  * responding server to substantially delay a call to shutdown().
439  */
440 #define TCP_FINISH_TIMEOUT ( 1 * TICKS_PER_SEC )
441 
442 extern struct tcpip_protocol tcp_protocol __tcpip_protocol;
443 
444 #endif /* _IPXE_TCP_H */
struct tcp_timestamp_option tsopt
Definition: tcp.h:135
unsigned short uint16_t
Definition: stdint.h:11
Padded TCP window scale option (used for sending)
Definition: tcp.h:65
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
uint8_t nop[2]
Definition: tcp.h:117
Parsed TCP options.
Definition: tcp.h:142
TCP selective acknowledgement option.
Definition: tcp.h:98
struct tcp_header __attribute__
uint8_t kind
Definition: tcp.h:49
Padded TCP selective acknowledgement option (used for sending)
Definition: tcp.h:116
TCP window scale option.
Definition: tcp.h:58
struct tcp_sack_permitted_option __attribute
uint8_t length
Definition: tcp.h:44
uint8_t kind
Definition: tcp.h:99
uint8_t length
Definition: tcp.h:100
const struct tcp_window_scale_option * wsopt
Window scale option, if present.
Definition: tcp.h:144
uint32_t tsecr
Definition: tcp.h:129
uint8_t flags
Definition: tcp.h:25
uint32_t seq
Definition: tcp.h:22
u16 seq
802.11 Sequence Control field
Definition: ieee80211.h:19
uint32_t start
Starting offset.
Definition: netvsc.h:12
uint16_t urg
Definition: tcp.h:28
uint32_t right
Definition: tcp.h:106
const struct tcp_sack_permitted_option * spopt
SACK permitted option, if present.
Definition: tcp.h:146
Padded TCP selective acknowledgement permitted option (used for sending)
Definition: tcp.h:89
TCP timestamp option.
Definition: tcp.h:125
uint32_t ack
Definition: tcp.h:23
Padded TCP timestamp option (used for sending)
Definition: tcp.h:133
Generic TCP option.
Definition: tcp.h:42
uint16_t win
Definition: tcp.h:26
uint8_t length
Definition: tcp.h:60
A TCP header.
Definition: tcp.h:19
Transport-network layer interface.
uint16_t csum
Definition: tcp.h:27
uint16_t dest
Definition: tcp.h:21
uint8_t kind
Definition: tcp.h:43
static int tcp_in_window(uint32_t seq, uint32_t start, uint32_t len)
Check if TCP sequence number lies within window.
Definition: tcp.h:430
unsigned char uint8_t
Definition: stdint.h:10
unsigned int uint32_t
Definition: stdint.h:12
TCP MSS option.
Definition: tcp.h:48
uint8_t kind
Definition: tcp.h:126
A transport-layer protocol of the TCP/IP stack (eg.
Definition: tcpip.h:104
struct tcpip_protocol tcp_protocol __tcpip_protocol
ICMPv4 TCP/IP protocol.
Definition: icmpv4.c:100
struct tcp_sack_option sackopt
Definition: tcp.h:118
uint8_t length
Definition: tcp.h:127
signed int int32_t
Definition: stdint.h:17
uint32_t len
Length.
Definition: ena.h:14
struct tcp_window_scale_option wsopt
Definition: tcp.h:67
uint32_t left
Definition: tcp.h:105
uint16_t mss
Definition: tcp.h:51
uint16_t src
Definition: tcp.h:20
static int32_t tcp_cmp(uint32_t seq1, uint32_t seq2)
Compare TCP sequence numbers.
Definition: tcp.h:418
uint32_t tsval
Definition: tcp.h:128
const struct tcp_timestamp_option * tsopt
Timestamp option, if present.
Definition: tcp.h:148
TCP selective acknowledgement permitted option.
Definition: tcp.h:83
TCP selective acknowledgement block.
Definition: tcp.h:104
uint8_t hlen
Definition: tcp.h:24
struct tcp_sack_permitted_option spopt
Definition: tcp.h:91
uint8_t length
Definition: tcp.h:50