今天我們來看看一個小例子,利用前面所學到的WinPcap編程知識來實現一個簡單的還原HTTP協議的程序。相信大家對於HTTP協議一定不會陌生,我這里只簡單地說一下它的報文格式,即HTTP報文有兩種:請求報文和響應報文。為了讓大家對於這兩種報文有更直觀的認識,給大家看兩個簡單的例子:
下面是一個典型的HTTP請求報文:
GET /somedir/page.html HTTP/1.1
Host: www.someschool.edu
Connection: close
User-agent: Mozilla/4.0
Accept-language: fr
再看一個HTTP響應報文:
HTTP/1.1 200 OK
Connection: close
Date: Thu, 03 Jul 2003 12:00:15 GMT
Server: Apache/1.3.0 (Unix)
Last-Modified: Sun, 6 May 2007 09:23:24 GMT
Content-Length: 6821
Content-Type: text/html
(data data data data data ...)
我們注意到HTTP請求報文中的第一行是以GET打頭的,沒錯,它實際上是HTTP請求的一種方法,類似的還有POST、HEAD等等。一般熟知的大概就是GET和POST了,像Servlet編程中就有doGet和doPost兩種提交HTTP請求的方法。而對於HTTP響應報文而言,第一行開頭是協議的版本號,如HTTP/1.1,現在普及的也是HTTP/1.1。利用這些我們可以來判斷TCP數據報文里是否保存的HTTP數據。
本程序的實現思路有很多種,我采用的是一種最笨拙的方式,即按照 判斷是否是IP數據包->判斷是否是TCP分組->判斷是否是HTTP報文 的邏輯,最后將HTTP報文的內容打印出來。程序開始前我們需要先定義一些重要協議的包格式,因為WinPcap並沒有為我們定義這些東西。
View Code
/*
* define struct of ethernet header , ip address , ip header and tcp header
*/
/* ethernet header */
typedef struct ether_header {
u_char ether_shost[ETHER_ADDR_LEN]; /* source ethernet address, 8 bytes */
u_char ether_dhost[ETHER_ADDR_LEN]; /* destination ethernet addresss, 8 bytes */
u_short ether_type; /* ethernet type, 16 bytes */
}ether_header;
/* four bytes ip address */
typedef struct ip_address {
u_char byte1;
u_char byte2;
u_char byte3;
u_char byte4;
}ip_address;
/* ipv4 header */
typedef struct ip_header {
u_char ver_ihl; /* version and ip header length */
u_char tos; /* type of service */
u_short tlen; /* total length */
u_short identification; /* identification */
u_short flags_fo; // flags and fragment offset
u_char ttl; /* time to live */
u_char proto; /* protocol */
u_short crc; /* header checksum */
ip_address saddr; /* source address */
ip_address daddr; /* destination address */
u_int op_pad; /* option and padding */
}ip_header;
/* tcp header */
typedef struct tcp_header {
u_short th_sport; /* source port */
u_short th_dport; /* destination port */
u_int th_seq; /* sequence number */
u_int th_ack; /* acknowledgement number */
u_short th_len_resv_code; /* datagram length and reserved code */
u_short th_window; /* window */
u_short th_sum; /* checksum */
u_short th_urp; /* urgent pointer */
}tcp_header;
還有一些重要的識別協議的類型,我們需要自己在代碼中進行定義。
#define ETHERTYPE_IP 0x0800 /* ip protocol */
#define TCP_PROTOCAL 0x0600 /* tcp protocol */
接下來再看看剛才所說的程序的邏輯是如何實現的。
1. 判斷是否是IP數據包。我們先回顧一下,在RFC 894中定義了以太網的封裝格式,由目的地址(6字節)、源地址(6字節)、類型(2字節)、數據以及CRC(4字節)構成。我們只需要關注頭部中類型這個字段,當它為0x0800時,表示數據保存的是IP數據報;當它為0x0806時,表示數據保存的是ARP請求/應答;當為0x8035時,數據保存的是RARP請求/應答。所以通過比較它的類型是否為0x0800,從而可以到達目的。
2. 判斷是否是TCP分組。跟上面類似,可以通過判斷IP首部中協議字段是否為0x0600即可。
3. 判斷是否是HTTP報文。根據上面所講解的HTTP報文格式,我們只需要判斷開頭是否為"GET"、"POST"、"HTTP/1.1"就可以做到了。具體程序是如何來判斷的我們來看看代碼吧!
View Code
/* capture packet */
while((res = pcap_next_ex(adhandle, &pheader, &pkt_data)) >= 0) {
if(res == 0)
continue; /* read time out*/
ether_header * eheader = (ether_header*)pkt_data; /* transform packet data to ethernet header */
if(eheader->ether_type == htons(ETHERTYPE_IP)) { /* ip packet only */
ip_header * ih = (ip_header*)(pkt_data+14); /* get ip header */
if(ih->proto == htons(TCP_PROTOCAL)) { /* tcp packet only */
int ip_len = ntohs(ih->tlen); /* get ip length, it contains header and body */
int find_http = false;
char* ip_pkt_data = (char*)ih;
int n = 0;
char buffer[BUFFER_MAX_LENGTH];
int bufsize = 0;
for(; n<ip_len; n++)
{
/* http get or post request */
if(!find_http && ((n+3<ip_len && strncmp(ip_pkt_data+n,"GET",strlen("GET")) ==0 )
|| (n+4<ip_len && strncmp(ip_pkt_data+n,"POST",strlen("POST")) == 0)) )
find_http = true;
/* http response */
if(!find_http && i+8<ip_len && strncmp(ip_pkt_data+i,"HTTP/1.1",strlen("HTTP/1.1"))==0)
find_http = true;
/* if http is found */
if(find_http)
{
buffer[bufsize] = ip_pkt_data[n]; /* copy http data to buffer */
bufsize ++;
}
}
/* print http content */
if(find_http) {
buffer[bufsize] = '\0';
printf("%s\n", buffer);
printf("\n**********************************************\n\n");
}
}
}
}
看一下運行后的截圖:

這里需要注意的是,程序本質上還沒有完全還原HTTP協議的功能,對於HTTP請求數據和響應數據進行解析,真正的應該可以通過Content-Type分析數據格式,並按照相應的解析方式進行解碼,還有對於中文字符的處理等等~~最后將整個程序的源碼貼出來,有任何意見或建議的可以隨意吐槽,虛心接受。ps: 注釋寫得不全請見諒~~
pheader.h頭文件
View Code
#ifndef PHEADER_H_INCLUDED
#define PHEADER_H_INCLUDED
/*
*
*/
#define ETHER_ADDR_LEN 6 /* ethernet address */
#define ETHERTYPE_IP 0x0800 /* ip protocol */
#define TCP_PROTOCAL 0x0600 /* tcp protocol */
#define BUFFER_MAX_LENGTH 65536 /* buffer max length */
#define true 1 /* define true */
#define false 0 /* define false */
/*
* define struct of ethernet header , ip address , ip header and tcp header
*/
/* ethernet header */
typedef struct ether_header {
u_char ether_shost[ETHER_ADDR_LEN]; /* source ethernet address, 8 bytes */
u_char ether_dhost[ETHER_ADDR_LEN]; /* destination ethernet addresss, 8 bytes */
u_short ether_type; /* ethernet type, 16 bytes */
}ether_header;
/* four bytes ip address */
typedef struct ip_address {
u_char byte1;
u_char byte2;
u_char byte3;
u_char byte4;
}ip_address;
/* ipv4 header */
typedef struct ip_header {
u_char ver_ihl; /* version and ip header length */
u_char tos; /* type of service */
u_short tlen; /* total length */
u_short identification; /* identification */
u_short flags_fo; // flags and fragment offset
u_char ttl; /* time to live */
u_char proto; /* protocol */
u_short crc; /* header checksum */
ip_address saddr; /* source address */
ip_address daddr; /* destination address */
u_int op_pad; /* option and padding */
}ip_header;
/* tcp header */
typedef struct tcp_header {
u_short th_sport; /* source port */
u_short th_dport; /* destination port */
u_int th_seq; /* sequence number */
u_int th_ack; /* acknowledgement number */
u_short th_len_resv_code; /* datagram length and reserved code */
u_short th_window; /* window */
u_short th_sum; /* checksum */
u_short th_urp; /* urgent pointer */
}tcp_header;
#endif // PHEADER_H_INCLUDED
main.c文件
#include <stdio.h>
#include <stdlib.h>
#define HAVE_REMOTE
#include <pcap.h>
#include "pheader.h"
/*
* function: a simple program to analyze http
* author: blacksword
* date: Wed March 21 2012
*/
int main()
{
pcap_if_t* alldevs; // list of all devices
pcap_if_t* d; // device you chose
pcap_t* adhandle;
char errbuf[PCAP_ERRBUF_SIZE]; //error buffer
int i=0;
int inum;
struct pcap_pkthdr *pheader; /* packet header */
const u_char * pkt_data; /* packet data */
int res;
/* pcap_findalldevs_ex got something wrong */
if (pcap_findalldevs_ex(PCAP_SRC_IF_STRING, NULL /* auth is not needed*/, &alldevs, errbuf) == -1)
{
fprintf(stderr, "Error in pcap_findalldevs_ex: %s\n", errbuf);
exit(1);
}
/* print the list of all devices */
for(d = alldevs; d != NULL; d = d->next)
{
printf("%d. %s", ++i, d->name); // print device name , which starts with "rpcap://"
if(d->description)
printf(" (%s)\n", d->description); // print device description
else
printf(" (No description available)\n");
}
/* no interface found */
if (i == 0)
{
printf("\nNo interface found! Make sure Winpcap is installed.\n");
return -1;
}
printf("Enter the interface number (1-%d):", i);
scanf("%d", &inum);
if(inum < 1 || inum > i)
{
printf("\nInterface number out of range.\n");
pcap_freealldevs(alldevs);
return -1;
}
for(d=alldevs, i=0; i < inum-1; d=d->next, i++); /* jump to the selected interface */
/* open the selected interface*/
if((adhandle = pcap_open(d->name, /* the interface name */
65536, /* length of packet that has to be retained */
PCAP_OPENFLAG_PROMISCUOUS, /* promiscuous mode */
1000, /* read time out */
NULL, /* auth */
errbuf /* error buffer */
)) == NULL)
{
fprintf(stderr, "\nUnable to open the adapter. %s is not supported by Winpcap\n",
d->description);
return -1;
}
printf("\nListening on %s...\n", d->description);
pcap_freealldevs(alldevs); // release device list
/* capture packet */
while((res = pcap_next_ex(adhandle, &pheader, &pkt_data)) >= 0) {
if(res == 0)
continue; /* read time out*/
ether_header * eheader = (ether_header*)pkt_data; /* transform packet data to ethernet header */
if(eheader->ether_type == htons(ETHERTYPE_IP)) { /* ip packet only */
ip_header * ih = (ip_header*)(pkt_data+14); /* get ip header */
if(ih->proto == htons(TCP_PROTOCAL)) { /* tcp packet only */
int ip_len = ntohs(ih->tlen); /* get ip length, it contains header and body */
int find_http = false;
char* ip_pkt_data = (char*)ih;
int n = 0;
char buffer[BUFFER_MAX_LENGTH];
int bufsize = 0;
for(; n<ip_len; n++)
{
/* http get or post request */
if(!find_http && ((n+3<ip_len && strncmp(ip_pkt_data+n,"GET",strlen("GET")) ==0 )
|| (n+4<ip_len && strncmp(ip_pkt_data+n,"POST",strlen("POST")) == 0)) )
find_http = true;
/* http response */
if(!find_http && n+8<ip_len && strncmp(ip_pkt_data+n,"HTTP/1.1",strlen("HTTP/1.1"))==0)
find_http = true;
/* if http is found */
if(find_http)
{
buffer[bufsize] = ip_pkt_data[n]; /* copy http data to buffer */
bufsize ++;
}
}
/* print http content */
if(find_http) {
buffer[bufsize] = '\0';
printf("%s\n", buffer);
printf("\n**********************************************\n\n");
}
}
}
}
return 0;
}
