深入理解TCP协议及其源代码——三次握手
Posted maotx
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了深入理解TCP协议及其源代码——三次握手相关的知识,希望对你有一定的参考价值。
Wireshark分析报文
对TCP三次握手过程进行抓包分析,并通过Wireshark的Analyze分析出tcp握手过程,通过截图体现传输内容。
1、捕获大量的由本地主机到远程服务器的TCP分组;
2、浏览追踪信息
在显示筛选规则编辑框中输入“tcp”,可以看到在本地主机和服务器之间传输的一系列tcp和HTTP消息,你应该能看到包含SYN Segment的三次握手。通过Analyze的Follow TCP Stream分析出传输内容。写出其中某TCP数据包的源IP地址,目的IP地址,源端口,目的端口,窗口大小。
筛选:
分析某一条数据包:
那么:
源IP地址:172.16.72.229
目的IP地址:36.110.171.40
源端口:51859
目的端口:80
窗口大小:8192
TCP的三次握手
TCP的三次握手大家都不陌生,下面用wireshark这个工具抓包,进一步的说明三次握手的细节。
1.由客户端发起tcp连接的请求,此时客户端发送一条报文,其中包含SYN标志位,将SYN设置为1; 以及seq位。设seq = x ; 该报文段成为SYN报文段
2.服务器收到这条报文后,返回给客户端一条报文,包含Ack位,SYN,以及seq位。 其中ack = x+1; SYN = 1; seq = y。该报文段称为SYNACK报文段
3.当客户端收到SYNACK报文段之后,客户端需要再给服务器发送另外一个报文段,进行确认。该报文段的SYN = 0, seq = x +1, ack = y+1;
TCP协议源代码跟踪分析
1.TCP的三次握手从用户程序的角度看就是客户端connect和服务端accept建立起连接时背后的完成的工作。由上次的实验我们可以知道,在socket接口层这两个socket API函数分别对应着sys_connect和sys_accept4函数,课上老师说明, sys_connect和sys_accecpt是通过函数指针sock->opt->connect和sock->opt->accept调用了具体的函数来实现的,在即调用了tcp_v4_connect函数和inet_csk_accept函数,这两个函数进一步触及TCP数据收发过程tcp_transmit_skb和tcp_v4_rcv函数。
在net/ipv4/tcp-ipv4.c文件下的结构体变量struct proto tcp_prot指定了TCP协议栈的访问接口函数:
1 struct proto tcp_prot = { 2 .name = "TCP", 3 .owner = THIS_MODULE, 4 .close = tcp_close, 5 .pre_connect = tcp_v4_pre_connect, 6 .connect = tcp_v4_connect, 7 .disconnect = tcp_disconnect, 8 .accept = inet_csk_accept, 9 .ioctl = tcp_ioctl, 10 .init = tcp_v4_init_sock, 11 .destroy = tcp_v4_destroy_sock, 12 .shutdown = tcp_shutdown, 13 .setsockopt = tcp_setsockopt, 14 .getsockopt = tcp_getsockopt, 15 .keepalive = tcp_set_keepalive, 16 .recvmsg = tcp_recvmsg, 17 .sendmsg = tcp_sendmsg, 18 .sendpage = tcp_sendpage, 19 .backlog_rcv = tcp_v4_do_rcv, 20 .release_cb = tcp_release_cb, 21 .hash = inet_hash, 22 .unhash = inet_unhash, 23 .get_port = inet_csk_get_port, 24 .enter_memory_pressure = tcp_enter_memory_pressure, 25 .leave_memory_pressure = tcp_leave_memory_pressure, 26 .stream_memory_free = tcp_stream_memory_free, 27 .sockets_allocated = &tcp_sockets_allocated, 28 .orphan_count = &tcp_orphan_count, 29 .memory_allocated = &tcp_memory_allocated, 30 .memory_pressure = &tcp_memory_pressure, 31 .sysctl_mem = sysctl_tcp_mem, 32 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 33 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 34 .max_header = MAX_TCP_HEADER, 35 .obj_size = sizeof(struct tcp_sock), 36 .slab_flags = SLAB_TYPESAFE_BY_RCU, 37 .twsk_prot = &tcp_timewait_sock_ops, 38 .rsk_prot = &tcp_request_sock_ops, 39 .h.hashinfo = &tcp_hashinfo, 40 .no_autobind = true, 41 #ifdef CONFIG_COMPAT 42 .compat_setsockopt = compat_tcp_setsockopt, 43 .compat_getsockopt = compat_tcp_getsockopt, 44 #endif 45 .diag_destroy = tcp_abort, 46 };
在这里,我们可以看到socket接口层里sock->opt->connect和sock->opt->accept实际调用的函数tcp_v4_connect和inet_csk_accept。
2.接下来通过MenuOS的内核调试环境设置断点跟踪tcp_v4_connect函数和inet_csk_accept函数来进一步验证三次握手的过程。
在tcp_v4_connect处打个断点:
可以发现tcp_v4_connect函数在net/ipv4/tcp_ipv4.c处定义,看下代码:
1 /* This will initiate an outgoing connection. */ 2 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 3 { 4 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 5 struct inet_sock *inet = inet_sk(sk); 6 struct tcp_sock *tp = tcp_sk(sk); 7 __be16 orig_sport, orig_dport; 8 __be32 daddr, nexthop; 9 struct flowi4 *fl4; 10 struct rtable *rt; 11 int err; 12 struct ip_options_rcu *inet_opt; 13 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 14 15 16 if (addr_len < sizeof(struct sockaddr_in)) 17 return -EINVAL; 18 19 20 if (usin->sin_family != AF_INET) 21 return -EAFNOSUPPORT; 22 23 24 nexthop = daddr = usin->sin_addr.s_addr; 25 inet_opt = rcu_dereference_protected(inet->inet_opt, 26 lockdep_sock_is_held(sk)); 27 if (inet_opt && inet_opt->opt.srr) { 28 if (!daddr) 29 return -EINVAL; 30 nexthop = inet_opt->opt.faddr; 31 } 32 33 34 orig_sport = inet->inet_sport; 35 orig_dport = usin->sin_port; 36 fl4 = &inet->cork.fl.u.ip4; 37 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 38 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 39 IPPROTO_TCP, 40 orig_sport, orig_dport, sk); 41 if (IS_ERR(rt)) { 42 err = PTR_ERR(rt); 43 if (err == -ENETUNREACH) 44 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 45 return err; 46 } 47 48 49 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 50 ip_rt_put(rt); 51 return -ENETUNREACH; 52 } 53 54 55 if (!inet_opt || !inet_opt->opt.srr) 56 daddr = fl4->daddr; 57 58 59 if (!inet->inet_saddr) 60 inet->inet_saddr = fl4->saddr; 61 sk_rcv_saddr_set(sk, inet->inet_saddr); 62 63 64 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 65 /* Reset inherited state */ 66 tp->rx_opt.ts_recent = 0; 67 tp->rx_opt.ts_recent_stamp = 0; 68 if (likely(!tp->repair)) 69 tp->write_seq = 0; 70 } 71 72 73 inet->inet_dport = usin->sin_port; 74 sk_daddr_set(sk, daddr); 75 76 77 inet_csk(sk)->icsk_ext_hdr_len = 0; 78 if (inet_opt) 79 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 80 81 82 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 83 84 85 /* Socket identity is still unknown (sport may be zero). 86 * However we set state to SYN-SENT and not releasing socket 87 * lock select source port, enter ourselves into the hash tables and 88 * complete initialization after this. 89 */ 90 tcp_set_state(sk, TCP_SYN_SENT); 91 err = inet_hash_connect(tcp_death_row, sk); 92 if (err) 93 goto failure; 94 95 96 sk_set_txhash(sk); 97 98 99 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 100 inet->inet_sport, inet->inet_dport, sk); 101 if (IS_ERR(rt)) { 102 err = PTR_ERR(rt); 103 rt = NULL; 104 goto failure; 105 } 106 /* OK, now commit destination to socket. */ 107 sk->sk_gso_type = SKB_GSO_TCPV4; 108 sk_setup_caps(sk, &rt->dst); 109 rt = NULL; 110 111 112 if (likely(!tp->repair)) { 113 if (!tp->write_seq) 114 tp->write_seq = secure_tcp_seq(inet->inet_saddr, 115 inet->inet_daddr, 116 inet->inet_sport, 117 usin->sin_port); 118 tp->tsoffset = secure_tcp_ts_off(sock_net(sk), 119 inet->inet_saddr, 120 inet->inet_daddr); 121 } 122 123 124 inet->inet_id = tp->write_seq ^ jiffies; 125 126 127 if (tcp_fastopen_defer_connect(sk, &err)) 128 return err; 129 if (err) 130 goto failure; 131 132 133 err = tcp_connect(sk); 134 135 136 if (err) 137 goto failure; 138 139 140 return 0; 141 142 143 failure: 144 /* 145 * This unhashes the socket and releases the local port, 146 * if necessary. 147 */ 148 tcp_set_state(sk, TCP_CLOSE); 149 ip_rt_put(rt); 150 sk->sk_route_caps = 0; 151 inet->inet_dport = 0; 152 return err; 153 }
分析代码,可以看出tcp_v4_connect函数的主要作用就是发起一个TCP连接,从这个函数中可以看到它调用了IP层提供的一些服务,比如ip_route_connect和ip_route_newports,同时在tcp_v4_connect函数中,调用了tcp_set_state函数,它设置了TCP_SYN_SENT,并进一步调用了tcp_connect(sk)来实际构造SYN并发送出去。
tcp_connect函数具体负责构造一个携带SYN标志位的TCP头并发送出去,同时还设置了计时器超时重发。这个函数定义在net/ipv4/tcp_output.c文件中,看看代码:
1 /* Build a SYN and send it off. */ 2 int tcp_connect(struct sock *sk) 3 { 4 struct tcp_sock *tp = tcp_sk(sk); 5 struct sk_buff *buff; 6 int err; 7 8 9 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL); 10 11 12 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 13 return -EHOSTUNREACH; /* Routing failure or similar. */ 14 15 16 tcp_connect_init(sk); 17 18 19 if (unlikely(tp->repair)) { 20 tcp_finish_connect(sk, NULL); 21 return 0; 22 } 23 24 25 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); 26 if (unlikely(!buff)) 27 return -ENOBUFS; 28 29 30 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 31 tcp_mstamp_refresh(tp); 32 tp->retrans_stamp = tcp_time_stamp(tp); 33 tcp_connect_queue_skb(sk, buff); 34 tcp_ecn_send_syn(sk, buff); 35 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); 36 37 38 /* Send off SYN; include data in Fast Open. */ 39 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 40 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); 41 if (err == -ECONNREFUSED) 42 return err; 43 44 45 /* We change tp->snd_nxt after the tcp_transmit_skb() call 46 * in order to make this packet get counted in tcpOutSegs. 47 */ 48 tp->snd_nxt = tp->write_seq; 49 tp->pushed_seq = tp->write_seq; 50 buff = tcp_send_head(sk); 51 if (unlikely(buff)) { 52 tp->snd_nxt = TCP_SKB_CB(buff)->seq; 53 tp->pushed_seq = TCP_SKB_CB(buff)->seq; 54 } 55 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); 56 57 58 /* Timer for repeating the SYN until an answer. */ 59 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 60 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 61 return 0; 62 } 63 EXPORT_SYMBOL(tcp_connect);
其中tcp_transmit_skb函数将tcp数据发送出去。
这边,客户端的一个tcp数据包发送出去了,服务端将做出什么反应呢,下面来看看服务端的inet_csk_accept函数,首先在inet_csk_accept处打上断点:
inet_csk_accept函数在net/ipv4/inet_connection_sock.c文件中:
1 /* 2 * This will accept the next outstanding connection. 3 */ 4 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) 5 { 6 struct inet_connection_sock *icsk = inet_csk(sk); 7 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 8 struct request_sock *req; 9 struct sock *newsk; 10 int error; 11 12 13 lock_sock(sk); 14 15 16 /* We need to make sure that this socket is listening, 17 * and that it has something pending. 18 */ 19 error = -EINVAL; 20 if (sk->sk_state != TCP_LISTEN) 21 goto out_err; 22 23 24 /* Find already established connection */ 25 if (reqsk_queue_empty(queue)) { 26 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 27 28 29 /* If this is a non blocking socket don‘t sleep */ 30 error = -EAGAIN; 31 if (!timeo) 32 goto out_err; 33 34 35 error = inet_csk_wait_for_connect(sk, timeo); 36 if (error) 37 goto out_err; 38 } 39 req = reqsk_queue_remove(queue, sk); 40 newsk = req->sk; 41 42 43 if (sk->sk_protocol == IPPROTO_TCP && 44 tcp_rsk(req)->tfo_listener) { 45 spin_lock_bh(&queue->fastopenq.lock); 46 if (tcp_rsk(req)->tfo_listener) { 47 /* We are still waiting for the final ACK from 3WHS 48 * so can‘t free req now. Instead, we set req->sk to 49 * NULL to signify that the child socket is taken 50 * so reqsk_fastopen_remove() will free the req 51 * when 3WHS finishes (or is aborted). 52 */ 53 req->sk = NULL; 54 req = NULL; 55 } 56 spin_unlock_bh(&queue->fastopenq.lock); 57 } 58 out: 59 release_sock(sk); 60 if (req) 61 reqsk_put(req); 62 return newsk; 63 out_err: 64 newsk = NULL; 65 req = NULL; 66 *err = error; 67 goto out; 68 } 69 EXPORT_SYMBOL(inet_csk_accept);
服务端的inet_csk_accept函数会从请求队列中取出一个连接请求,如果队列为空则通过inet_csk_wait_for_connect阻塞住等待客户端的连接。
inet_csk_wait_for_connect函数定义在net/ipv4/inet_connection_sock.c文件中:
1 static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 2 { 3 struct inet_connection_sock *icsk = inet_csk(sk); 4 DEFINE_WAIT(wait); 5 int err; 6 for (;;) { 7 prepare_to_wait_exclusive(sk_sleep(sk), &wait, 8 TASK_INTERRUPTIBLE); 9 release_sock(sk); 10 if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 11 timeo = schedule_timeout(timeo); 12 sched_annotate_sleep(); 13 lock_sock(sk); 14 err = 0; 15 if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 16 break; 17 err = -EINVAL; 18 if (sk->sk_state != TCP_LISTEN) 19 break; 20 err = sock_intr_errno(timeo); 21 if (signal_pending(current)) 22 break; 23 err = -EAGAIN; 24 if (!timeo) 25 break; 26 } 27 finish_wait(sk_sleep(sk), &wait); 28 return err; 29 }
根据代码可以分析出整个三次握手的过程为:客户端通过tcp_v4_connect函数调用到tcp_connect函数,将请求发送数据包出去,服务器端则通过inet_csk_accept函数调用inet_csk_wait_for_connect函数中的for循环进入阻塞,直到监听到请求才跳出循环。connect启动到返回和accept返回之间就是所谓三次握手的时间。
3.三次握手中携带SYN/ACK的TCP头数据的发送和接收
以上分析了用户程序调用socket接口、通过系统调用陷入内核进入内核态的socket接口层代码,然后根据创建socket时指定协议选择适当的函数指针进入协议处理代码中。那么网卡接收到数据后是如何通知上层协议来接收并处理数据的呢。其实在TCP/IP协议栈的初始化过程中,协议栈将handler赋值为tcp_v4_rcv的函数指针,也就是TCP协议中负责接收处理数据的入口,接收TCP连接请求及进行三次握手处理过程也都是从这里开始。
内核在处理接收到的TCP报文时使用了4个队列容器,分别为receive、out_of_order、prequeue、backlog队列。当网卡接收到报文并判断为TCP协议后,将会调用到内核的tcp_v4_rcv方法。tcp_v4_rcv方法会把这个报文直接插入到receive队列中。
在该函数定义在net/ipv4/tcp_ipv4.c文件中。
tcp_v4_rcv函数只要做以下几个工作:
(1) 设置TCP_CB
(2) 查找控制块
(3)根据控制块状态做不同处理,包括TCP_TIME_WAIT状态处理,TCP_NEW_SYN_RECV状态处理,TCP_LISTEN状态处理
(4) 接收TCP段
以上完成了将接收数据放入accept队列中,之后服务端接收客户端发来的tcp报文,并发送回SYN+ACK。
当前客户端处于TCP_SYN_SENT状态,并调用tcp_rcv_synsent_state_process处理SYN_SENT状态下接收到的TCP段,发送ACK报文
到这里,三次握手期间tcp接收处理数据包的过程基本完成。
以上是关于深入理解TCP协议及其源代码——三次握手的主要内容,如果未能解决你的问题,请参考以下文章