iPXE
tcp.h
Go to the documentation of this file.
1#ifndef _IPXE_TCP_H
2#define _IPXE_TCP_H
3
4/** @file
5 *
6 * TCP protocol
7 *
8 * This file defines the iPXE TCP API.
9 *
10 */
11
12FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
13FILE_SECBOOT ( PERMITTED );
14
15#include <ipxe/tcpip.h>
16
17/**
18 * A TCP header
19 */
20struct tcp_header {
21 uint16_t src; /* Source port */
22 uint16_t dest; /* Destination port */
23 uint32_t seq; /* Sequence number */
24 uint32_t ack; /* Acknowledgement number */
25 uint8_t hlen; /* Header length (4), Reserved (4) */
26 uint8_t flags; /* Reserved (2), Flags (6) */
27 uint16_t win; /* Advertised window */
28 uint16_t csum; /* Checksum */
29 uint16_t urg; /* Urgent pointer */
30} __attribute__ (( packed ));
31
32/** @defgroup tcpopts TCP options
33 * @{
34 */
35
36/** End of TCP options list */
37#define TCP_OPTION_END 0
38
39/** TCP option pad */
40#define TCP_OPTION_NOP 1
41
42/** Generic TCP option */
47
48/** TCP MSS option */
54
55/** Code for the TCP MSS option */
56#define TCP_OPTION_MSS 2
57
58/** TCP window scale option */
64
65/** Padded TCP window scale option (used for sending) */
70
71/** Code for the TCP window scale option */
72#define TCP_OPTION_WS 3
73
74/** Advertised TCP window scale
75 *
76 * Using a scale factor of 2**9 provides for a maximum window of 32MB,
77 * which is sufficient to allow Gigabit-speed transfers with a 200ms
78 * RTT. The minimum advertised window is 512 bytes, which is still
79 * less than a single packet.
80 */
81#define TCP_RX_WINDOW_SCALE 9
82
83/** TCP selective acknowledgement permitted option */
88
89/** Padded TCP selective acknowledgement permitted option (used for sending) */
94
95/** Code for the TCP selective acknowledgement permitted option */
96#define TCP_OPTION_SACK_PERMITTED 4
97
98/** TCP selective acknowledgement option */
103
104/** TCP selective acknowledgement block */
109
110/** Maximum number of selective acknowledgement blocks
111 *
112 * This allows for the presence of the TCP timestamp option.
113 */
114#define TCP_SACK_MAX 3
115
116/** Padded TCP selective acknowledgement option (used for sending) */
121
122/** Code for the TCP selective acknowledgement option */
123#define TCP_OPTION_SACK 5
124
125/** TCP timestamp option */
132
133/** Padded TCP timestamp option (used for sending) */
138
139/** Code for the TCP timestamp option */
140#define TCP_OPTION_TS 8
141
142/** Parsed TCP options */
144 /** Window scale option, if present */
146 /** SACK permitted option, if present */
148 /** Timestamp option, if present */
150};
151
152/** @} */
153
154/*
155 * TCP flags
156 */
157#define TCP_CWR 0x80
158#define TCP_ECE 0x40
159#define TCP_URG 0x20
160#define TCP_ACK 0x10
161#define TCP_PSH 0x08
162#define TCP_RST 0x04
163#define TCP_SYN 0x02
164#define TCP_FIN 0x01
165
166/**
167* @defgroup tcpstates TCP states
168*
169* The TCP state is defined by a combination of the flags that have
170* been sent to the peer, the flags that have been acknowledged by the
171* peer, and the flags that have been received from the peer.
172*
173* @{
174*/
175
176/** TCP flags that have been sent in outgoing packets */
177#define TCP_STATE_SENT(flags) ( (flags) << 0 )
178#define TCP_FLAGS_SENT(state) ( ( (state) >> 0 ) & 0xff )
179
180/** TCP flags that have been acknowledged by the peer
181 *
182 * Note that this applies only to SYN and FIN.
183 */
184#define TCP_STATE_ACKED(flags) ( (flags) << 8 )
185#define TCP_FLAGS_ACKED(state) ( ( (state) >> 8 ) & 0xff )
186
187/** TCP flags that have been received from the peer
188 *
189 * Note that this applies only to SYN and FIN, and that once SYN has
190 * been received, we should always be sending ACK.
191 */
192#define TCP_STATE_RCVD(flags) ( (flags) << 16 )
193#define TCP_FLAGS_RCVD(state) ( ( (state) >> 16 ) & 0xff )
194
195/** TCP flags that are currently being sent in outgoing packets */
196#define TCP_FLAGS_SENDING(state) \
197 ( TCP_FLAGS_SENT ( state ) & ~TCP_FLAGS_ACKED ( state ) )
198
199/** CLOSED
200 *
201 * The connection has not yet been used for anything.
202 */
203#define TCP_CLOSED TCP_RST
204
205/** LISTEN
206 *
207 * Not currently used as a state; we have no support for listening
208 * connections. Given a unique value to avoid compiler warnings.
209 */
210#define TCP_LISTEN 0
211
212/** SYN_SENT
213 *
214 * SYN has been sent, nothing has yet been received or acknowledged.
215 */
216#define TCP_SYN_SENT ( TCP_STATE_SENT ( TCP_SYN ) )
217
218/** SYN_RCVD
219 *
220 * SYN has been sent but not acknowledged, SYN has been received.
221 */
222#define TCP_SYN_RCVD ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
223 TCP_STATE_RCVD ( TCP_SYN ) )
224
225/** ESTABLISHED
226 *
227 * SYN has been sent and acknowledged, SYN has been received.
228 */
229#define TCP_ESTABLISHED ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
230 TCP_STATE_ACKED ( TCP_SYN ) | \
231 TCP_STATE_RCVD ( TCP_SYN ) )
232
233/** FIN_WAIT_1
234 *
235 * SYN has been sent and acknowledged, SYN has been received, FIN has
236 * been sent but not acknowledged, FIN has not been received.
237 *
238 * RFC 793 shows that we can enter FIN_WAIT_1 without have had SYN
239 * acknowledged, i.e. if the application closes the connection after
240 * sending and receiving SYN, but before having had SYN acknowledged.
241 * However, we have to *pretend* that SYN has been acknowledged
242 * anyway, otherwise we end up sending SYN and FIN in the same
243 * sequence number slot. Therefore, when we transition from SYN_RCVD
244 * to FIN_WAIT_1, we have to remember to set TCP_STATE_ACKED(TCP_SYN)
245 * and increment our sequence number.
246 */
247#define TCP_FIN_WAIT_1 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
248 TCP_STATE_ACKED ( TCP_SYN ) | \
249 TCP_STATE_RCVD ( TCP_SYN ) )
250
251/** FIN_WAIT_2
252 *
253 * SYN has been sent and acknowledged, SYN has been received, FIN has
254 * been sent and acknowledged, FIN ha not been received.
255 */
256#define TCP_FIN_WAIT_2 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
257 TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
258 TCP_STATE_RCVD ( TCP_SYN ) )
259
260/** CLOSING / LAST_ACK
261 *
262 * SYN has been sent and acknowledged, SYN has been received, FIN has
263 * been sent but not acknowledged, FIN has been received.
264 *
265 * This state actually encompasses both CLOSING and LAST_ACK; they are
266 * identical with the definition of state that we use. I don't
267 * *believe* that they need to be distinguished.
268 */
269#define TCP_CLOSING_OR_LAST_ACK \
270 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
271 TCP_STATE_ACKED ( TCP_SYN ) | \
272 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
273
274/** TIME_WAIT
275 *
276 * SYN has been sent and acknowledged, SYN has been received, FIN has
277 * been sent and acknowledged, FIN has been received.
278 */
279#define TCP_TIME_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
280 TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
281 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
282
283/** CLOSE_WAIT
284 *
285 * SYN has been sent and acknowledged, SYN has been received, FIN has
286 * been received.
287 */
288#define TCP_CLOSE_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
289 TCP_STATE_ACKED ( TCP_SYN ) | \
290 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
291
292/** Can send data in current state
293 *
294 * We can send data if and only if we have had our SYN acked and we
295 * have not yet sent our FIN.
296 */
297#define TCP_CAN_SEND_DATA(state) \
298 ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
299 TCP_STATE_SENT ( TCP_FIN ) ) ) \
300 == TCP_STATE_ACKED ( TCP_SYN ) )
301
302/** Have ever been fully established
303 *
304 * We have been fully established if we have both received a SYN and
305 * had our own SYN acked.
306 */
307#define TCP_HAS_BEEN_ESTABLISHED(state) \
308 ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
309 TCP_STATE_RCVD ( TCP_SYN ) ) ) \
310 == ( TCP_STATE_ACKED ( TCP_SYN ) | TCP_STATE_RCVD ( TCP_SYN ) ) )
311
312/** Have closed gracefully
313 *
314 * We have closed gracefully if we have both received a FIN and had
315 * our own FIN acked.
316 */
317#define TCP_CLOSED_GRACEFULLY(state) \
318 ( ( (state) & ( TCP_STATE_ACKED ( TCP_FIN ) | \
319 TCP_STATE_RCVD ( TCP_FIN ) ) ) \
320 == ( TCP_STATE_ACKED ( TCP_FIN ) | TCP_STATE_RCVD ( TCP_FIN ) ) )
321
322/** @} */
323
324/** Mask for TCP header length field */
325#define TCP_MASK_HLEN 0xf0
326
327/** Smallest port number on which a TCP connection can listen */
328#define TCP_MIN_PORT 1
329
330/**
331 * Maxmimum advertised TCP window size
332 *
333 * The maximum bandwidth on any link is limited by
334 *
335 * max_bandwidth * round_trip_time = tcp_window
336 *
337 * Some rough expectations for achievable bandwidths over various
338 * links are:
339 *
340 * a) Gigabit LAN: expected bandwidth 125MB/s, typical RTT 0.5ms,
341 * minimum required window 64kB
342 *
343 * b) 10-Gigabit LAN: expected bandwidth 1250MB/s, typical RTT
344 * 0.5ms, minimum required window 640kB
345 *
346 * c) Home Internet connection: expected bandwidth 50MB/s, typical
347 * RTT 25ms, minimum required window 1280kB
348 *
349 * d) International WAN: expected bandwidth 50MB/s, typical RTT
350 * 25ms, minimum required window 1280kB
351 *
352 * e) Intercontinental WAN: expected bandwidth 5MB/s, typical RTT
353 * 250ms, minimum required window 1280kB.
354 *
355 * The maximum possible value for the TCP window size is 1GB (using
356 * the maximum window scale of 2**14). However, it is advisable to
357 * keep the window size as small as possible (without limiting
358 * bandwidth), since in the event of a lost packet the window size
359 * represents the maximum amount that will need to be retransmitted.
360 *
361 * We therefore choose a (rounded up) maximum window size of 2048kB.
362 */
363#define TCP_MAX_WINDOW_SIZE ( 2048 * 1024 )
364
365/**
366 * Path MTU
367 *
368 * IPv6 requires all data link layers to support a datagram size of
369 * 1280 bytes. We choose to use this as our maximum transmitted
370 * datagram size, on the assumption that any practical link layer we
371 * encounter will allow this size. This is a very conservative
372 * assumption in practice, but the impact of making such a
373 * conservative assumption is insignificant since the amount of data
374 * that we transmit (rather than receive) is negligible.
375 *
376 * We allow space within this 1280 bytes for an IPv6 header, a TCP
377 * header, and a (padded) TCP timestamp option.
378 */
379#define TCP_PATH_MTU \
380 ( 1280 - 40 /* IPv6 */ - 20 /* TCP */ - 12 /* TCP timestamp */ )
381
382/** TCP maximum segment lifetime
383 *
384 * Currently set to 2 minutes, as per RFC 793.
385 */
386#define TCP_MSL ( 2 * 60 * TICKS_PER_SEC )
387
388/**
389 * TCP keepalive period
390 *
391 * We send keepalive ACKs after this period of inactivity has elapsed
392 * on an established connection.
393 */
394#define TCP_KEEPALIVE_DELAY ( 15 * TICKS_PER_SEC )
395
396/**
397 * TCP maximum header length
398 *
399 */
400#define TCP_MAX_HEADER_LEN \
401 ( MAX_LL_NET_HEADER_LEN + \
402 sizeof ( struct tcp_header ) + \
403 sizeof ( struct tcp_mss_option ) + \
404 sizeof ( struct tcp_window_scale_padded_option ) + \
405 sizeof ( struct tcp_timestamp_padded_option ) )
406
407/**
408 * Compare TCP sequence numbers
409 *
410 * @v seq1 Sequence number 1
411 * @v seq2 Sequence number 2
412 * @ret diff Sequence difference
413 *
414 * Analogous to memcmp(), returns an integer less than, equal to, or
415 * greater than zero if @c seq1 is found, respectively, to be before,
416 * equal to, or after @c seq2.
417 */
418static inline __attribute__ (( always_inline )) int32_t
419tcp_cmp ( uint32_t seq1, uint32_t seq2 ) {
420 return ( ( int32_t ) ( seq1 - seq2 ) );
421}
422
423/**
424 * Check if TCP sequence number lies within window
425 *
426 * @v seq Sequence number
427 * @v start Start of window
428 * @v len Length of window
429 * @ret in_window Sequence number is within window
430 */
432 uint32_t len ) {
433 return ( ( seq - start ) < len );
434}
435
436/** TCP finish wait time
437 *
438 * Currently set to one second, since we should not allow a slowly
439 * responding server to substantially delay a call to shutdown().
440 */
441#define TCP_FINISH_TIMEOUT ( 1 * TICKS_PER_SEC )
442
443/** TCP statistics */
445 /** Number of packets received */
446 unsigned long in_segs;
447 /** Total number of packets discarded due to lack of memory */
448 unsigned long in_discards;
449 /** Total number of packets received out of order */
450 unsigned long in_out_of_order;
451
452 /** Number of octets received (including duplicate data) */
453 unsigned long in_octets;
454 /** Number of octets processed and passed to upper layer */
455 unsigned long in_octets_good;
456};
457
458extern struct tcpip_protocol tcp_protocol __tcpip_protocol;
459
460extern struct tcp_statistics tcp_stats;
461
462#endif /* _IPXE_TCP_H */
unsigned short uint16_t
Definition stdint.h:11
unsigned int uint32_t
Definition stdint.h:12
signed int int32_t
Definition stdint.h:17
unsigned char uint8_t
Definition stdint.h:10
ring len
Length.
Definition dwmac.h:226
uint32_t start
Starting offset.
Definition netvsc.h:1
#define FILE_LICENCE(_licence)
Declare a particular licence as applying to a file.
Definition compiler.h:896
#define FILE_SECBOOT(_status)
Declare a file's UEFI Secure Boot permission status.
Definition compiler.h:926
struct tcp_sack_permitted_option __attribute
u16 seq
802.11 Sequence Control field
Definition ieee80211.h:5
#define __attribute__(x)
Definition compiler.h:10
Transport-network layer interface.
#define __tcpip_protocol
Declare a TCP/IP transport-layer protocol.
Definition tcpip.h:182
A TCP header.
Definition tcp.h:20
uint8_t hlen
Definition tcp.h:25
uint8_t flags
Definition tcp.h:26
uint32_t seq
Definition tcp.h:23
uint16_t csum
Definition tcp.h:28
uint16_t win
Definition tcp.h:27
uint16_t urg
Definition tcp.h:29
uint16_t src
Definition tcp.h:21
uint16_t dest
Definition tcp.h:22
uint32_t ack
Definition tcp.h:24
TCP MSS option.
Definition tcp.h:49
uint8_t length
Definition tcp.h:51
uint16_t mss
Definition tcp.h:52
uint8_t kind
Definition tcp.h:50
Generic TCP option.
Definition tcp.h:43
uint8_t kind
Definition tcp.h:44
uint8_t length
Definition tcp.h:45
Parsed TCP options.
Definition tcp.h:143
const struct tcp_window_scale_option * wsopt
Window scale option, if present.
Definition tcp.h:145
const struct tcp_timestamp_option * tsopt
Timestamp option, if present.
Definition tcp.h:149
const struct tcp_sack_permitted_option * spopt
SACK permitted option, if present.
Definition tcp.h:147
TCP selective acknowledgement block.
Definition tcp.h:105
uint32_t right
Definition tcp.h:107
uint32_t left
Definition tcp.h:106
TCP selective acknowledgement option.
Definition tcp.h:99
uint8_t length
Definition tcp.h:101
uint8_t kind
Definition tcp.h:100
Padded TCP selective acknowledgement option (used for sending)
Definition tcp.h:117
uint8_t nop[2]
Definition tcp.h:118
struct tcp_sack_option sackopt
Definition tcp.h:119
TCP selective acknowledgement permitted option.
Definition tcp.h:84
Padded TCP selective acknowledgement permitted option (used for sending)
Definition tcp.h:90
struct tcp_sack_permitted_option spopt
Definition tcp.h:92
TCP statistics.
Definition tcp.h:444
unsigned long in_segs
Number of packets received.
Definition tcp.h:446
unsigned long in_octets
Number of octets received (including duplicate data)
Definition tcp.h:453
unsigned long in_out_of_order
Total number of packets received out of order.
Definition tcp.h:450
unsigned long in_octets_good
Number of octets processed and passed to upper layer.
Definition tcp.h:455
unsigned long in_discards
Total number of packets discarded due to lack of memory.
Definition tcp.h:448
TCP timestamp option.
Definition tcp.h:126
uint8_t length
Definition tcp.h:128
uint32_t tsval
Definition tcp.h:129
uint32_t tsecr
Definition tcp.h:130
Padded TCP timestamp option (used for sending)
Definition tcp.h:134
struct tcp_timestamp_option tsopt
Definition tcp.h:136
TCP window scale option.
Definition tcp.h:59
Padded TCP window scale option (used for sending)
Definition tcp.h:66
struct tcp_window_scale_option wsopt
Definition tcp.h:68
A transport-layer protocol of the TCP/IP stack (eg.
Definition tcpip.h:105
struct tcp_statistics tcp_stats
TCP statistics.
Definition tcp.c:172
static int tcp_in_window(uint32_t seq, uint32_t start, uint32_t len)
Check if TCP sequence number lies within window.
Definition tcp.h:431
static int32_t tcp_cmp(uint32_t seq1, uint32_t seq2)
Compare TCP sequence numbers.
Definition tcp.h:419