TCP三次握手源码分析

Posted hujisha

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了TCP三次握手源码分析相关的知识,希望对你有一定的参考价值。

TCP握手分为三个阶段,在握手开始之前,通信双方的套接字状态均为“TCP_CLOSE”,以下是这三个阶段:

(1)客户端发送一个标志位中SYN位为1的报文给服务端,并设套接字状态为“TCP_SYNSENT”

(2)服务端接到SYN报文,设套接字状态为“TCP_SYNRCV”,并回送一个SYN+ACK位均为1的报文

(3)客户端接到SYN+ACK报文,回送一个ACK位为1的报文,设套接字状态为“TCP_ESTABLISHED”,服务端接到ACK报文后,同样设置为“TCP_ESTABLISHED”

技术图片

 

第一阶段

第一阶段客户端通过调用connect函数完成,connect实际上调用了内核中的__sys_connect函数。

以下代码是有关__sys_connect函数在文件net/scoket.c中的系统调用定义,由此可以看出,__sys_connect函数就是connect在内核中的实现。

SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
		int, addrlen)
{
	return __sys_connect(fd, uservaddr, addrlen);
}

从__sys_connect函数开始进入三次握手的第一阶段,以下是部分代码:

int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
{
... sock = sockfd_lookup_light(fd, &err, &fput_needed); ... err = move_addr_to_kernel(uservaddr, addrlen, &address);
... err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, sock->file->f_flags); ... }
代码中的sock->ops->connect即是tcp_v4_connect函数,现在转到tcp_v4_connect函数:
  1 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
  2 {
  3     struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
  4     struct inet_sock *inet = inet_sk(sk);
  5     struct tcp_sock *tp = tcp_sk(sk);
  6     __be16 orig_sport, orig_dport;
  7     __be32 daddr, nexthop;
  8     struct flowi4 *fl4;
  9     struct rtable *rt;
 10     int err;
 11     struct ip_options_rcu *inet_opt;
 12     struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 13 
 14     if (addr_len < sizeof(struct sockaddr_in))
 15         return -EINVAL;
 16 
 17     if (usin->sin_family != AF_INET)
 18         return -EAFNOSUPPORT;
 19 
 20     nexthop = daddr = usin->sin_addr.s_addr;
 21     inet_opt = rcu_dereference_protected(inet->inet_opt,
 22                          lockdep_sock_is_held(sk));
 23     if (inet_opt && inet_opt->opt.srr) {
 24         if (!daddr)
 25             return -EINVAL;
 26         nexthop = inet_opt->opt.faddr;
 27     }
 28 
 29     orig_sport = inet->inet_sport;
 30     orig_dport = usin->sin_port;
 31     fl4 = &inet->cork.fl.u.ip4;
 32     rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 33                   RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 34                   IPPROTO_TCP,
 35                   orig_sport, orig_dport, sk);
 36     if (IS_ERR(rt)) {
 37         err = PTR_ERR(rt);
 38         if (err == -ENETUNREACH)
 39             IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 40         return err;
 41     }
 42 
 43     if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 44         ip_rt_put(rt);
 45         return -ENETUNREACH;
 46     }
 47 
 48     if (!inet_opt || !inet_opt->opt.srr)
 49         daddr = fl4->daddr;
 50 
 51     if (!inet->inet_saddr)
 52         inet->inet_saddr = fl4->saddr;
 53     sk_rcv_saddr_set(sk, inet->inet_saddr);
 54 
 55     if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 56         /* Reset inherited state */
 57         tp->rx_opt.ts_recent       = 0;
 58         tp->rx_opt.ts_recent_stamp = 0;
 59         if (likely(!tp->repair))
 60             tp->write_seq       = 0;
 61     }
 62 
 63     inet->inet_dport = usin->sin_port;
 64     sk_daddr_set(sk, daddr);
 65 
 66     inet_csk(sk)->icsk_ext_hdr_len = 0;
 67     if (inet_opt)
 68         inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 69 
 70     tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 71 
 72     /* Socket identity is still unknown (sport may be zero).
 73      * However we set state to SYN-SENT and not releasing socket
 74      * lock select source port, enter ourselves into the hash tables and
 75      * complete initialization after this.
 76      */
 77     tcp_set_state(sk, TCP_SYN_SENT);
 78     err = inet_hash_connect(tcp_death_row, sk);
 79     if (err)
 80         goto failure;
 81 
 82     sk_set_txhash(sk);
 83 
 84     rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 85                    inet->inet_sport, inet->inet_dport, sk);
 86     if (IS_ERR(rt)) {
 87         err = PTR_ERR(rt);
 88         rt = NULL;
 89         goto failure;
 90     }
 91     /* OK, now commit destination to socket.  */
 92     sk->sk_gso_type = SKB_GSO_TCPV4;
 93     sk_setup_caps(sk, &rt->dst);
 94     rt = NULL;
 95 
 96     if (likely(!tp->repair)) {
 97         if (!tp->write_seq)
 98             tp->write_seq = secure_tcp_seq(inet->inet_saddr,
 99                                inet->inet_daddr,
100                                inet->inet_sport,
101                                usin->sin_port);
102         tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
103                          inet->inet_saddr,
104                          inet->inet_daddr);
105     }
106 
107     inet->inet_id = tp->write_seq ^ jiffies;
108 
109     if (tcp_fastopen_defer_connect(sk, &err))
110         return err;
111     if (err)
112         goto failure;
113 
114     err = tcp_connect(sk);
115 
116     if (err)
117         goto failure;
118 
119     return 0;
120 
121 failure:
122     /*
123      * This unhashes the socket and releases the local port,
124      * if necessary.
125      */
126     tcp_set_state(sk, TCP_CLOSE);
127     ip_rt_put(rt);
128     sk->sk_route_caps = 0;
129     inet->inet_dport = 0;
130     return err;
131 }

在tcp_v4_connect函数中为套接字填充一些变量,将套接字的状态修改为“TCP_SYNSENT”,然后进入tcp_connect函数。

 

 1 int tcp_connect(struct sock *sk)
 2 {
 3     struct tcp_sock *tp = tcp_sk(sk);
 4     struct sk_buff *buff;
 5     int err;
 6 
 7     tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
 8 
 9     if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
10         return -EHOSTUNREACH; /* Routing failure or similar. */
11 
12     tcp_connect_init(sk);
13 
14     if (unlikely(tp->repair)) {
15         tcp_finish_connect(sk, NULL);
16         return 0;
17     }
18 
19     buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
20     if (unlikely(!buff))
21         return -ENOBUFS;
22 
23     tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
24     tcp_mstamp_refresh(tp);
25     tp->retrans_stamp = tcp_time_stamp(tp);
26     tcp_connect_queue_skb(sk, buff);
27     tcp_ecn_send_syn(sk, buff);
28     tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
29 
30     /* Send off SYN; include data in Fast Open. */
31     err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
32           tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
33     
39     ...47 
48     /* Timer for repeating the SYN until an answer. */
49     inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
50                   inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
51     return 0;
52 }

通过调用tcp_transmit_skb函数构造SYN报文并发送出去,并设立一个定时器。

 

这一阶段函数的调用栈:

__sys_connect -> inet_stream_connect -> __inet_stream_connect -> tcp_v4_connect -> tcp_connect -> tcp_transmit_skb

第二阶段

这一阶段从中通过tcp_v4_rcv函数从ip层接收数据开始,以下是tcp_v4_rcv的部分代码:

 1 int tcp_v4_rcv(struct sk_buff *skb)
 2 {
 3     ...
 4 
 5     if (sk->sk_state == TCP_LISTEN) {
 6         ret = tcp_v4_do_rcv(sk, skb);
 7         goto put_and_return;
 8     }
 9 
10     ...
11 
12 put_and_return:
13     if (refcounted)
14         sock_put(sk);
15 
16     return ret;
17         ...
18 }

由于当前套接字状态为“TCP_LISTEN”,进入tcp_v4_do_rcv函数执行

 1 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 2 {
 3     ...
 4 
 5     if (sk->sk_state == TCP_LISTEN) {
 6 
 7     if (tcp_rcv_state_process(sk, skb)) {
 8         rsk = sk;
 9         goto reset;
10     }
11     return 0;
12        ...
13 }

tcp_rcv_state_process函数专门用来处理套接字状态的转换,先贴出一张状态转换图:

技术图片

 

 1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 2 {
 3         ...
 4 
 5     switch (sk->sk_state) {
 6     case TCP_LISTEN:
 7         if (th->ack)
 8             return 1;
 9 
10         if (th->rst)
11             goto discard;
12 
13         if (th->syn) {
14             if (th->fin)
15                 goto discard;
16             /* It is possible that we process SYN packets from backlog,
17              * so we need to make sure to disable BH and RCU right there.
18              */
19             rcu_read_lock();
20             local_bh_disable();
21             acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
22             local_bh_enable();
23             rcu_read_unlock();
24 
25             if (!acceptable)
26                 return 1;
27             consume_skb(skb);
28             return 0;
29         }
30         goto discard;
31                 ...
32 }

这是tcp_rcv_state_process在“TCP_LISTEN”阶段执行的代码,核心在于22行的icsk->icsk_af_ops->conn_request,在此处一路执行tcp_v4_conn_request, tcp_conn_request。

以下是tcp_conn_request的部分代码:

 1 if (fastopen_sk) {
 2         af_ops->send_synack(fastopen_sk, dst, &fl, req,
 3                     &foc, TCP_SYNACK_FASTOPEN);
 4         /* Add the child socket directly into the accept queue */
 5         inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
 6         sk->sk_data_ready(sk);
 7         bh_unlock_sock(fastopen_sk);
 8         sock_put(fastopen_sk);
 9     } else {
10         tcp_rsk(req)->tfo_listener = false;
11         if (!want_cookie)
12             inet_csk_reqsk_queue_hash_add(sk, req,
13                 tcp_timeout_init((struct sock *)req));
14         af_ops->send_synack(sk, dst, &fl, req, &foc,
15                     !want_cookie ? TCP_SYNACK_NORMAL :
16                            TCP_SYNACK_COOKIE);
17         if (want_cookie) {
18             reqsk_free(req);
19             return 0;
20         }
21     }

主要执行了send_synack函数,send_synack函数用于将SYN+ACK报文发送出去。

这一阶段函数的调用栈:

tcp_v4_rcv -> tcp_v4_do_rcv -> tcp_rcv_state_process -> tcp_v4_conn_request -> tcp_conn_request -> tcp_v4_send_synack

 

第三阶段

同上一阶段一样,从ip接收到报文后一路执行tcp_v4_rcv, tcp_v4_do_rcv,进入tcp_rcv_state_process函数:

 1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 2               const struct tcphdr *th, unsigned int len)
 3 {
 4     ...
 5     switch (sk->sk_state) {
 6     case TCP_SYN_SENT:
 7         //进入到synack报文的处理流程
 8         queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
 9         if (queued >= 0)
10             return queued;
11 
12         /* Do step6 onward by hand. */
13         tcp_urg(sk, skb, th);
14         __kfree_skb(skb);
15         tcp_data_snd_check(sk);
16         return 0;
17     }
18     ...
19 }

在tcp_rcv_synsent_state_process函数中又调用了tcp_finish_connect函数,tcp_finish_connect函数做了三件事:
(1)将套接字状态设置为"TCP_ESTABLISHED"
(2)调用tcp_send_ack函数发送一个ACK包
(3)初始化一些参数
tcp_send_ack函数又调用tcp_transmit_skb将ACK报文从网络上发出去。
最后是服务端接收到ACK报文,依次执行tcp_v4_rcv,tcp_v4_do_rcv,tcp_rcv_state_process函数,将套接字的状态设置为"TCP_ESTABLISHED",至此,三次握手过程结束。

这一阶段函数的调用栈:

tcp_v4_rcv -> tcp_v4_do_rcv -> tcp_rcv_synsent_state_process -> tcp_send_ack -> tcp_transmit_skb

tcp_v4_rcv -> tcp_rcv_state_process

以上是关于TCP三次握手源码分析的主要内容,如果未能解决你的问题,请参考以下文章

TCP三次握手源码分析

TCP三次握手源码分析

深入理解TCP协议及其源代码——三次握手

深入理解TCP实现,TCP三次握手四次挥手全讲明白了

用tcpdump分析tcp三次握手,四次挥手

TCP协议三次握手过程分析