ping的源碼解析
1、下載Ubuntu的ping源碼
-
查看ping的源碼在哪個包下
-
下載源碼包:apt-get source iputils-ping
- 如果出現了"You must put some 'source' URIs in your sources.list"錯誤,需要先在系統設置->Software&Updates里把Source code選項打開
- 下載的源代碼在當前的文件夾下
2、編譯/執行ping
-
在源碼文件夾iputils-20121221下執行make ping
如果出現了"fatal error: sys/capability.h: No such file or directory
compilation terminated"錯誤,則執行"apt-get install libcap-dev" -
得到編譯好的ping.o和ping的可執行文件。測試是否能用。
3、源碼分析
- 查看makefile可以看到,ping主要是由ping_common.h、ping_common.c、ping.c編譯得來的。
- 查看ping.c里包含的頭文件,包括netinet/ip.h、netinet/ip_icmp.h、ifaddrs.h。在/usr/include下把這些頭文件也給找到。
- find指令:find /usr/include -name ip.h
- 整體結構圖
4、main函數
先從最頂層開始看,且由於ping的選項很多,關注頂層時不要死抓住各種選項的設置不放,先通過研究ping的最基本用法【ping 地址】來理清主要框架。
// 全局變量和結構聲明
struct sockaddr_in whereto; /* who to ping */
int optlen = 0;
int settos = 0; /* Set TOS, Precendence or other QOS options */
int icmp_sock; /* socket file descriptor */
u_char outpack[0x10000];
int maxpacket = sizeof(outpack);
struct sockaddr_in source;
// sockaddr_in定義在<netinet/in.h>中,這里把它拿過來
struct sockaddr_in {
sa_family_t sin_family; //地址族
uint16_t sin_port; // 16位TCP/UDP端口號
struct in_addr sin_addr; //32位IP地址
char sin_zero[8]; //不使用
}
int main(int argc, char **argv)
{
struct hostent *hp;
int ch, hold, packlen;
int socket_errno;
u_char *packet;
char *target;
//把預編譯的東西折疊不看
#ifdef USE_IDN...
#else...
#endif
char rspace[3 + 4 * NROUTES + 1]; /* record route space */
limit_capabilities();
#ifdef USE_IDN...
#endif
enable_capability_raw();
//創建icmp套接字
icmp_sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
socket_errno = errno;
disable_capability_raw();
source.sin_family = AF_INET;
preload = 1;
//根據ping的選項來設置
while ((ch = getopt(argc, argv, COMMON_OPTSTR "bRT:")) != EOF) {
switch(ch) {
case 'b':...
case 'Q':...
case 'R':...
case 'T':...
case 'I':...
case 'M':...
case 'V':
printf("ping utility, iputils-%s\n", SNAPSHOT);
exit(0);
//如果是一般選項,則交給common_option函數處理
COMMON_OPTIONS
common_options(ch);
break;
default:
usage();
}
}
argc -= optind; //處理完選項參數,參數個數減少
argv += optind; //為獲取下個參數,參數指針增加
//如果后面沒參數了,打印用法(usage)
if (argc == 0)
usage();
//參數個數大於1,根據選項進行設置,跳過
if (argc > 1) {...
}
// 定義目標
while (argc > 0) {
//獲取輸入的目標地址
target = *argv;
memset((char *)&whereto, 0, sizeof(whereto));
whereto.sin_family = AF_INET;
if (inet_aton(target, &whereto.sin_addr) == 1) {
hostname = target;
if (argc == 1)
options |= F_NUMERIC;
} else {
char *idn;
//通過域名獲取ip地址
hp = gethostbyname2(idn, AF_INET);
if (!hp) {
fprintf(stderr, "ping: unknown host %s\n", target);
exit(2);
}
hostname = hnamebuf;
}
if (argc > 1)
route[nroute++] = whereto.sin_addr.s_addr;
argc--;
argv++;
}
// 判斷是否是廣播地址,並對目標IP嘗試連接
if (source.sin_addr.s_addr == 0) {
socklen_t alen;
struct sockaddr_in dst = whereto;
int probe_fd = socket(AF_INET, SOCK_DGRAM, 0);
if (probe_fd < 0) {
perror("socket");
exit(2);
}
close(probe_fd);
} while (0);
// 如果目標ip地址為0,則賦值為127.0.0.1,本地回環地址
if (whereto.sin_addr.s_addr == 0)
whereto.sin_addr.s_addr = source.sin_addr.s_addr;
// icmp套接字創建失敗
if (icmp_sock < 0) {
errno = socket_errno;
perror("ping: icmp open socket");
exit(2);
}
// -I選擇的設置,跳過
if (device) {....
}
// -b選項的設置,跳過
if (broadcast_pings || IN_MULTICAST(ntohl(whereto.sin_addr.s_addr))) {...
}
// -M選項的設置,跳過
if (pmtudisc >= 0) {...
}
// -I選項的設置,跳過
if ((options&F_STRICTSOURCE) && bind(icmp_sock, (struct sockaddr*)&source, sizeof(source)) == -1) {
}
// 其他一些選項的設置,跳過
...
// 設置套接字接受和發送緩沖區的大小
hold = datalen + 8;
hold += ((hold+511)/512)*(optlen + 20 + 16 + 64 + 160);
sock_setbufs(icmp_sock, hold);
// 以下就是ping的過程中我們能看到的打印信息了
// 如 PING baidu.com (39.156.69.79)
printf("PING %s (%s) ", hostname, inet_ntoa(whereto.sin_addr));
if (device || (options&F_STRICTSOURCE))
printf("from %s %s: ", inet_ntoa(source.sin_addr), device ?: "");
// 56(84) bytes of data.
// datalen默認是(64-8)= 56
printf("%d(%d) bytes of data.\n", datalen, datalen+8+optlen+20);
// 重點1
setup(icmp_sock);
// 重點2
main_loop(icmp_sock, packet, packlen);
}
5、setup函數
void setup(int icmp_sock)
{
....
//以上為選項設置 跳過
//獲取進程ID,識別包要用
ident = htons(getpid() & 0xFFFF);
set_signal(SIGINT, sigexit);
set_signal(SIGALRM, sigexit);
set_signal(SIGQUIT, sigstatus);
sigemptyset(&sset);
sigprocmask(SIG_SETMASK, &sset, NULL);
gettimeofday(&start_time, NULL);
if (deadline) {
struct itimerval it;
it.it_interval.tv_sec = 0;
it.it_interval.tv_usec = 0;
it.it_value.tv_sec = deadline;
it.it_value.tv_usec = 0;
setitimer(ITIMER_REAL, &it, NULL);
}
if (isatty(STDOUT_FILENO)) {
struct winsize w;
if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) {
if (w.ws_col > 0)
screen_width = w.ws_col;
}
}
}
6、main_loop函數
main_loop里面是個死循環,根據時間間隔發包->收包->解析包->發包...
void main_loop(int icmp_sock, __u8 *packet, int packlen)
{
char addrbuf[128];
char ans_data[4096];
struct iovec iov;
struct msghdr msg;
struct cmsghdr *c;
int cc;
int next;
int polling;
iov.iov_base = (char *)packet;
for (;;) {
/* 檢查退出情況 */
if (exiting)
break;
if (npackets && nreceived + nerrors >= npackets)
break;
if (deadline && nerrors)
break;
/* 檢查狀態並作出回應。status_snapshot在setup時被修改過
* 丟包、拒收或超時等情況 */
if (status_snapshot)
status();
/* 發包 */
do {
next = pinger();
next = schedule_exit(next);
} while (next <= 0);
/* "next"如果為正,則為發下個probe的時間
* 如果 next<=0 則表示現在盡快發出 */
/* Technical part. Looks wicked. Could be dropped,
* if everyone used the newest kernel. :-)
* Its purpose is:
* 1. Provide intervals less than resolution of scheduler.
* Solution: spinning.
* 2. Avoid use of poll(), when recvmsg() can provide
* timed waiting (SO_RCVTIMEO). */
polling = 0;
// 控制發送時間間隔
if ((options & (F_ADAPTIVE|F_FLOOD_POLL)) || next<SCHINT(interval)) {
int recv_expected = in_flight();
/* If we are here, recvmsg() is unable to wait for
* required timeout. */
if (1000 % HZ == 0 ? next <= 1000 / HZ : (next < INT_MAX / HZ && next * HZ <= 1000)) {
/* Very short timeout... So, if we wait for
* something, we sleep for MININTERVAL.
* Otherwise, spin! */
if (recv_expected) {
next = MININTERVAL;
} else {
next = 0;
/* When spinning, no reasons to poll.
* Use nonblocking recvmsg() instead. */
polling = MSG_DONTWAIT;
/* But yield yet. */
sched_yield();
}
}
if (!polling &&
((options & (F_ADAPTIVE|F_FLOOD_POLL)) || interval)) {
struct pollfd pset;
pset.fd = icmp_sock;
pset.events = POLLIN|POLLERR;
pset.revents = 0;
if (poll(&pset, 1, next) < 1 ||
!(pset.revents&(POLLIN|POLLERR)))
continue;
polling = MSG_DONTWAIT;
}
}
// 接收ICMP回應包
for (;;) {
struct timeval *recv_timep = NULL;
struct timeval recv_time;
int not_ours = 0; /* Raw socket can receive messages
* destined to other running pings. */
iov.iov_len = packlen;
memset(&msg, 0, sizeof(msg));
// msg是msghdr類型全局變量,msghdr結構的定義見后
msg.msg_name = addrbuf;
msg.msg_namelen = sizeof(addrbuf);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = ans_data;
msg.msg_controllen = sizeof(ans_data);
cc = recvmsg(icmp_sock, &msg, polling); //收包
polling = MSG_DONTWAIT;
// 如果接收失敗
if (cc < 0) {
if (errno == EAGAIN || errno == EINTR)
break;
if (!receive_error_msg()) {
if (errno) {
perror("ping: recvmsg");
break;
}
not_ours = 1;
}
} else {
#ifdef SO_TIMESTAMP
for (c = CMSG_FIRSTHDR(&msg); c; c = CMSG_NXTHDR(&msg, c)) {
if (c->cmsg_level != SOL_SOCKET ||
c->cmsg_type != SO_TIMESTAMP)
continue;
if (c->cmsg_len < CMSG_LEN(sizeof(struct timeval)))
continue;
recv_timep = (struct timeval*)CMSG_DATA(c);
}
#endif
if ((options&F_LATENCY) || recv_timep == NULL) {
if ((options&F_LATENCY) ||
ioctl(icmp_sock, SIOCGSTAMP, &recv_time))
gettimeofday(&recv_time, NULL);
recv_timep = &recv_time;
}
// 解析收到的包
not_ours = parse_reply(&msg, cc, addrbuf, recv_timep);
}
/* See? ... someone runs another ping on this host. */
if (not_ours)
install_filter();
/* If nothing is in flight, "break" returns us to pinger. */
if (in_flight() == 0)
break;
/* Otherwise, try to recvmsg() again. recvmsg()
* is nonblocking after the first iteration, so that
* if nothing is queued, it will receive EAGAIN
* and return to pinger. */
}
}
finish();
}
6、pinger函數
組成並傳送一個ICMP ECHO請求包。
ID是UNIX進程的ID,sequence number是一個遞增的整數。
data段的頭8個字節裝UNIX的時間戳,用來計算往返時間。
具體裝包發包是由send_probe函數實現的。
int pinger(void)
{
static int oom_count;
static int tokens;
int i;
/* 如果發夠了就隨機返回一個正數 */
if (exiting || (npackets && ntransmitted >= npackets && !deadline))
return 1000;
/* Check that packets < rate*time + preload */
if (cur_time.tv_sec == 0) {
gettimeofday(&cur_time, NULL);
tokens = interval*(preload-1);
} else {
long ntokens;
struct timeval tv;
gettimeofday(&tv, NULL);
ntokens = (tv.tv_sec - cur_time.tv_sec)*1000 +
(tv.tv_usec-cur_time.tv_usec)/1000;
if (!interval) {
/* Case of unlimited flood is special;
* if we see no reply, they are limited to 100pps */
if (ntokens < MININTERVAL && in_flight() >= preload)
return MININTERVAL-ntokens;
}
ntokens += tokens;
if (ntokens > interval*preload)
ntokens = interval*preload;
if (ntokens < interval)
return interval - ntokens;
cur_time = tv;
tokens = ntokens - interval;
}
if (options & F_OUTSTANDING) {
if (ntransmitted > 0 && !rcvd_test(ntransmitted)) {
print_timestamp();
printf("no answer yet for icmp_seq=%lu\n", (ntransmitted % MAX_DUP_CHK));
fflush(stdout);
}
}
resend:
i = send_probe(); // 處理工作做完,發包
// 發送成功
if (i == 0) {
oom_count = 0;
advance_ntransmitted();
if (!(options & F_QUIET) && (options & F_FLOOD)) {
/* Very silly, but without this output with
* high preload or pipe size is very confusing. */
if ((preload < screen_width && pipesize < screen_width) ||
in_flight() < screen_width)
write_stdout(".", 1);
}
return interval - tokens;
}
/* 發送失敗,處理各種錯誤 */
if (i > 0) {
/* Apparently, it is some fatal bug. */
abort();
} else if (errno == ENOBUFS || errno == ENOMEM) {
int nores_interval;
/* Device queue overflow or OOM. Packet is not sent. */
tokens = 0;
/* Slowdown. This works only in adaptive mode (option -A) */
rtt_addend += (rtt < 8*50000 ? rtt/8 : 50000);
if (options&F_ADAPTIVE)
update_interval();
nores_interval = SCHINT(interval/2);
if (nores_interval > 500)
nores_interval = 500;
oom_count++;
if (oom_count*nores_interval < lingertime)
return nores_interval;
i = 0;
/* Fall to hard error. It is to avoid complete deadlock
* on stuck output device even when dealine was not requested.
* Expected timings are screwed up in any case, but we will
* exit some day. :-) */
} else if (errno == EAGAIN) {
/* Socket buffer is full. */
tokens += interval;
return MININTERVAL;
} else {
if ((i=receive_error_msg()) > 0) {
/* An ICMP error arrived. */
tokens += interval;
return MININTERVAL;
}
/* Compatibility with old linuces. */
if (i == 0 && confirm_flag && errno == EINVAL) {
confirm_flag = 0;
errno = 0;
}
if (!errno)
goto resend;
}
/* Hard local error. Pretend we sent packet. */
advance_ntransmitted();
if (i == 0 && !(options & F_QUIET)) {
if (options & F_FLOOD)
write_stdout("E", 1);
else
perror("ping: sendmsg");
}
tokens = 0;
return SCHINT(interval);
}
7、send_probe函數
int send_probe()
{
// ICMP報文頭部
struct icmphdr *icp;
int cc;
int i;
icp = (struct icmphdr *)outpack;
icp->type = ICMP_ECHO; // 請求類型:request
icp->code = 0;
icp->checksum = 0; //校驗和
icp->un.echo.sequence = htons(ntransmitted+1); //
icp->un.echo.id = ident; // 進程ID
rcvd_clear(ntransmitted+1);
if (timing) {
if (options&F_LATENCY) {
struct timeval tmp_tv;
gettimeofday(&tmp_tv, NULL);
memcpy(icp+1, &tmp_tv, sizeof(tmp_tv));
} else {
memset(icp+1, 0, sizeof(struct timeval));
}
}
cc = datalen + 8; /* skips ICMP portion */
/* compute ICMP checksum here */
icp->checksum = in_cksum((u_short *)icp, cc, 0);
if (timing && !(options&F_LATENCY)) {
struct timeval tmp_tv;
gettimeofday(&tmp_tv, NULL);
memcpy(icp+1, &tmp_tv, sizeof(tmp_tv));
icp->checksum = in_cksum((u_short *)&tmp_tv, sizeof(tmp_tv), ~icp->checksum);
}
// 發出去之前得把包封裝成msghdr類型
do {
static struct iovec iov = {outpack, 0};
static struct msghdr m = { &whereto, sizeof(whereto),
&iov, 1, &cmsg, 0, 0 };
m.msg_controllen = cmsg_len;
iov.iov_len = cc;
i = sendmsg(icmp_sock, &m, confirm); // 在這里發包
confirm = 0;
} while (0);
return (cc == i ? 0 : i);
}
8、parse_reply函數
打印收到的ICMP包,就是一個拆包的過程
int
parse_reply(struct msghdr *msg, int cc, void *addr, struct timeval *tv)
{
struct sockaddr_in *from = addr;
__u8 *buf = msg->msg_iov->iov_base;
struct icmphdr *icp;
struct iphdr *ip;
int hlen;
int csfailed;
/* 檢查IP包頭部 */
ip = (struct iphdr *)buf;
hlen = ip->ihl*4; // IP包頭長度
if (cc < hlen + 8 || ip->ihl < 5) {
if (options & F_VERBOSE)
fprintf(stderr, "ping: packet too short (%d bytes) from %s\n", cc,
pr_addr(from->sin_addr.s_addr));
return 1;
}
/* ICMP包部分 */
cc -= hlen;
//指針往右走hlen就是ICMP包的起始
icp = (struct icmphdr *)(buf + hlen);
//檢查校驗和
csfailed = in_cksum((u_short *)icp, cc, 0);
// 收到ICMP包如果是回應類型
if (icp->type == ICMP_ECHOREPLY) {
// 先對比進程ID,確定是自己要的包
if (icp->un.echo.id != ident)
return 1; /* 'Twas not our ECHO */
// 計算來回的時間
if (gather_statistics((__u8*)icp, sizeof(*icp), cc,
ntohs(icp->un.echo.sequence),
ip->ttl, 0, tv, pr_addr(from->sin_addr.s_addr),
pr_echo_reply)) {
fflush(stdout);
return 0; // 回到main_loop
}
} else {
/* 當收到一個重定向或源抑制包時進入
/* We fall here when a redirect or source quench arrived.
* Also this branch processes icmp errors, when IP_RECVERR
* is broken. */
switch (icp->type) {
case ICMP_ECHO: // 收到一個ICMP請求包
/* MUST NOT */
return 1;
case ICMP_SOURCE_QUENCH: // 源抑制
case ICMP_REDIRECT: // 重定向
case ICMP_DEST_UNREACH: // 目標不可達
case ICMP_TIME_EXCEEDED: // 超時
case ICMP_PARAMETERPROB: // 參數錯誤
{
struct iphdr * iph = (struct iphdr *)(&icp[1]);
struct icmphdr *icp1 = (struct icmphdr*)((unsigned char *)iph + iph->ihl*4);
int error_pkt;
if (cc < 8+sizeof(struct iphdr)+8 ||
cc < 8+iph->ihl*4+8)
return 1;
if (icp1->type != ICMP_ECHO ||
iph->daddr != whereto.sin_addr.s_addr ||
icp1->un.echo.id != ident)
return 1;
error_pkt = (icp->type != ICMP_REDIRECT &&
icp->type != ICMP_SOURCE_QUENCH);
if (error_pkt) {
acknowledge(ntohs(icp1->un.echo.sequence));
return 0;
}
nerrors+=error_pkt;
// 根據選項操作
...
}
return 0;
}
9、常用網絡編程函數:
- socket函數:用來創建套接字
- 函數原型 int socket(int domain, int type, int protocol);
- domain表示套接字要使用的協議簇
- AF_UNIX(本機通信)
- AF_INET(TCP/IP – IPv4)
- AF_INET6(TCP/IP – IPv6)
- type表示套接字類型
- SOCK_STREAM(TCP流)
- SOCK_DGRAM(UDP數據報)
- SOCK_RAW(原始套接字)
- protocol用來確定協議種類,一般為0
- htons函數:將端口號由主機字節序轉換為網絡字節序的整數值
- 如 mysock.sin_port = htons(80)
- inet_addr函數:將一個IP字符串轉化為一個網絡字節序的整數值
- 如 mysock.sin_addr.s_addr = inet_addr("192.168.1.0")
- recvmsg\ sendmsg函數
- ssize_t recvmsg ( int sockfd , struct msghdr * msg , int flags )
- ssize_t sendmsg ( int sockfd , struct msghdr * msg , int flags ) ;
- sockfd - 套接字描述符
- msg - 消息頭部
- flags - 套接口設置標識
- msghdr結構體
struct msghdr {
void * msg_name ; / * 消息的協議地址 * /
socklen_t msg_namelen ; / * 地址的長度 * /
struct iovec * msg_iov ; / * 多io緩沖區的地址 * /
int msg_iovlen ; / * 緩沖區的個數 * /
void * msg_control ; / * 輔助數據的地址 * /
socklen_t msg_controllen ; / * 輔助數據的長度 * /
int msg_flags ; / * 接收消息的標識 * /
} ;
10、感想心得
- 每個命令都有很多可選參數,第一遍讀源碼時去深究每個參數的功能實現是很難的,很容易陷入層層遞進的函數,最后放棄。應該先把實現框架搞懂。
- 主要功能的實現依靠icmp包的封裝,icmp包的解析,sendmsg和recvmsg兩個函數。復雜之處在於
- 各個參數的不同設置
- 發包時間間隔的設置,涉及進程信號處理
- 整理一下parse_reply即收到包的解析
- 首先解析IP包的頭部,解析完指針往前走
- 讀取ICMP包的type段、code段判斷是否是reply包
- 檢查校驗和
- 比較標識符(進程ID)
- 根據時間戳計算來回時間
- 今后還需要加深對UNIX網絡編程的學習。由於很多定義和函數用法不熟悉,浪費了很多時間。