今天,又是按時下班,吃飽飯,繼續跟大家分享Cowboy,昨天有件高興的事,我嘗試用閃存給@博客園團隊發了一條閃存,問是否能在博客園首頁中的編程語言分類中添加 Erlang 這一種編程語言,很快,@博客園團隊就給我回信,很爽快的答應了,並且幫我把以前的文章也導入這個分類,省去了我手動倒的麻煩,這邊謝謝@博客園團隊的熱心,希望博客園越來越好,越來越多的朋友來學 Erlang。
好了,繼續我們上一篇講到的cowboy_http_protocol:request/2 方法:
parse_header(#http_req{socket=Socket, transport=Transport, connection=ConnAtom, pid=self(), method=Method, version=Version, path='*', raw_path= <<"*">>, raw_qs= <<>>, onresponse=OnResponse, urldecode=URLDec}, State);
這邊有個記錄 #http_req{} 我們看下它的定義,在 cowboy/include 目錄下的 http.hrl 中,這個記錄保存每個http請求的詳細信息:
-record(http_req, { %% Transport. socket = undefined :: undefined | inet:socket(), transport = undefined :: undefined | module(), connection = keepalive :: keepalive | close, %% Request. pid = undefined :: pid(), method = 'GET' :: cowboy_http:method(), version = {1, 1} :: cowboy_http:version(), peer = undefined :: undefined | {inet:ip_address(), inet:port_number()}, host = undefined :: undefined | cowboy_dispatcher:tokens(), host_info = undefined :: undefined | cowboy_dispatcher:tokens(), raw_host = undefined :: undefined | binary(), port = undefined :: undefined | inet:port_number(), path = undefined :: undefined | '*' | cowboy_dispatcher:tokens(), path_info = undefined :: undefined | cowboy_dispatcher:tokens(), raw_path = undefined :: undefined | binary(), qs_vals = undefined :: undefined | list({binary(), binary() | true}), raw_qs = undefined :: undefined | binary(), bindings = undefined :: undefined | cowboy_dispatcher:bindings(), headers = [] :: cowboy_http:headers(), p_headers = [] :: [any()], %% @todo Improve those specs. cookies = undefined :: undefined | [{binary(), binary()}], meta = [] :: [{atom(), any()}], %% Request body. body_state = waiting :: waiting | done | {stream, fun(), any(), fun()} | {multipart, non_neg_integer(), fun()}, buffer = <<>> :: binary(), %% Response. resp_state = waiting :: locked | waiting | chunks | done, resp_headers = [] :: cowboy_http:headers(), resp_body = <<>> :: iodata() | {non_neg_integer(), fun(() -> {sent, non_neg_integer()})}, %% Functions. onresponse = undefined :: undefined | fun((cowboy_http:status(), cowboy_http:headers(), #http_req{}) -> #http_req{}), urldecode :: {fun((binary(), T) -> binary()), T} }).
好了,弄明白了,這個記錄,我們看下 cowboy_http_protocol:parse_header/2 具體實現:
-spec parse_header(#http_req{}, #state{}) -> ok. parse_header(Req, State=#state{buffer=Buffer, max_line_length=MaxLength}) -> case erlang:decode_packet(httph_bin, Buffer, []) of {ok, Header, Rest} -> header(Header, Req, State#state{buffer=Rest}); {more, _Length} when byte_size(Buffer) > MaxLength -> error_terminate(413, State); {more, _Length} -> wait_header(Req, State); {error, _Reason} -> error_terminate(400, State) end.
我們還詳細看下這個函數,還記得 erlang:decode_packet/3 這個函數嗎,在前兩篇文章,我們有過介紹,這次第一個參數由 http_bin 換成了 httph_bin,而這個參數又是什么意思呢?我們依然給出 erlang doc 地址:http://www.erlang.org/doc/man/erlang.html#decode_packet-3,官方文檔對這個參數的描述如下:
- http | httph | http_bin | httph_bin
-
The Hypertext Transfer Protocol. The packets are returned with the format according to HttpPacket described above. A packet is either a request, a response, a header or an end of header mark. Invalid lines are returned as HttpError.
Recognized request methods and header fields are returned as atoms. Others are returned as strings.
The protocol type http should only be used for the first line when a HttpRequest or a HttpResponse is expected. The following calls should use httph to get HttpHeader's until http_eoh is returned that marks the end of the headers and the beginning of any following message body.
The variants http_bin and httph_bin will return strings (HttpString) as binaries instead of lists.
不知道大家看了能不能理解,我們還是用斷點看下,究竟是什么意思。下面是當我訪問 http://localhost:8080/ 時斷點,監控到的變量的值:
< Req = {http_req,#Port<0.2990>,cowboy_tcp_transport,keepalive,<0.500.0>,
'GET',
{1,1},
undefined,undefined,undefined,undefined,undefined,[],
undefined,<<"/">>,undefined,<<>>,undefined,[],[],undefined,
[],waiting,<<>>,waiting,[],<<>>,undefined,
{#Fun<cowboy_http.urldecode.2>,crash}}
< State = {state,<0.270.0>,#Port<0.2990>,cowboy_tcp_transport,
[{'_',[{[<<"websocket">>],websocket_handler,[]},
{[<<"eventsource">>],eventsource_handler,[]},
{[<<"eventsource">>,<<"live">>],
eventsource_emitter,[]},
{'_',default_handler,[]}]}],
undefined,undefined,undefined,
{#Fun<cowboy_http.urldecode.2>,crash},
0,5,1,infinity,4096,5000,
<<"Host: localhost:8080\r\nUser-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/12.0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\nAccept-Encoding: gzip, deflate\r\nConnection: keep-alive\r\n\r\n">>,
false,infinity,undefined}
< Buffer = <<"Host: localhost:8080\r\nUser-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/12.0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\nAccept-Encoding: gzip, deflate\r\nConnection: keep-alive\r\n\r\n">>
< MaxLength = 4096
接下來看下,執行 erlang:decode_packet/3 函數返回的結果:{ok, Header, Rest},相關變量的值如下:
< Header = {http_header,14,'Host',undefined,<<"localhost:8080">>}
< Rest = <<"User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/12.0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\nAccept-Encoding: gzip, deflate\r\nConnection: keep-alive\r\n\r\n">>
這下大家應該都明白了吧,我們繼續往下看下這行,Header, Req的值如上,而State記錄修改了buffer作為參數帶入到 header函數中:
header(Header, Req, State#state{buffer=Rest});
這里調用了 cowboy_http_protocol:header/3 函數:
-spec header({http_header, integer(), cowboy_http:header(), any(), binary()} | http_eoh, #http_req{}, #state{}) -> ok. header({http_header, _I, 'Host', _R, RawHost}, Req=#http_req{ transport=Transport, host=undefined}, State) -> RawHost2 = cowboy_bstr:to_lower(RawHost), case catch cowboy_dispatcher:split_host(RawHost2) of {Host, RawHost3, undefined} -> Port = default_port(Transport:name()), parse_header(Req#http_req{ host=Host, raw_host=RawHost3, port=Port, headers=[{'Host', RawHost3}|Req#http_req.headers]}, State); {Host, RawHost3, Port} -> parse_header(Req#http_req{ host=Host, raw_host=RawHost3, port=Port, headers=[{'Host', RawHost3}|Req#http_req.headers]}, State); {'EXIT', _Reason} -> error_terminate(400, State) end;
這個函數有幾個重載,我們根據之前的參數,能夠確定調用的是哪個重載,也就是上面貼出的代碼。我們按老規矩一行一行來看這個函數:
RawHost2 = cowboy_bstr:to_lower(RawHost), 這個模塊第一次見,我們看下這個函數:
%% @doc Convert a binary string to lowercase. -spec to_lower(binary()) -> binary(). to_lower(L) -> << << (char_to_lower(C)) >> || << C >> <= L >>.
看函數注釋,還是比較清楚的,轉換二進制字符串為小寫。這里的 char_to_lower/1 很簡單,我就不貼代碼了。大家應該能一眼看懂,但是比較疑惑,erlang系統沒有這樣的函數支持嗎?還是作者不知道?先不管了,知道的朋友,可以留言告訴我,謝謝。
接着往下看吧,case catch cowboy_dispatcher:split_host(RawHost2) of
%% @doc Split a hostname into a list of tokens. -spec split_host(binary()) -> {tokens(), binary(), undefined | inet:port_number()}. split_host(<<>>) -> {[], <<>>, undefined}; split_host(Host) -> case binary:split(Host, <<":">>) of [Host] -> {binary:split(Host, <<".">>, [global, trim]), Host, undefined}; [Host2, Port] -> {binary:split(Host2, <<".">>, [global, trim]), Host2, list_to_integer(binary_to_list(Port))} end.
這個函數也比較簡單,就是分割主機名,返回格式為 {Host, RawHost3, Port} = {[<<"localhost">>], <<"localhost">>, 8080}。
往下一行又調用了 cowboy_http_protocol:parse_header/2 函數:
parse_header(Req#http_req{ host=Host, raw_host=RawHost3, port=Port, headers=[{'Host', RawHost3}|Req#http_req.headers]}, State);
只不過,這次的參數變了,從參數我們可以理解,其實這是在解析Http的頭部,也就是咱們之前HttpFox所看到的Http Headers,如下圖:
上面,我們已經解析出了 Host,接下來調用:
parse_header(Req#http_req{ host=Host, raw_host=RawHost3, port=Port, headers=[{'Host', RawHost3}|Req#http_req.headers]}, State);
將解析下一行,知道解析完整個頭部為止,我把 cowboy_http_protocol:header/3的所有重載都貼出來:
-spec header({http_header, integer(), cowboy_http:header(), any(), binary()} | http_eoh, #http_req{}, #state{}) -> ok. header({http_header, _I, 'Host', _R, RawHost}, Req=#http_req{ transport=Transport, host=undefined}, State) -> RawHost2 = cowboy_bstr:to_lower(RawHost), case catch cowboy_dispatcher:split_host(RawHost2) of {Host, RawHost3, undefined} -> Port = default_port(Transport:name()), parse_header(Req#http_req{ host=Host, raw_host=RawHost3, port=Port, headers=[{'Host', RawHost3}|Req#http_req.headers]}, State); {Host, RawHost3, Port} -> parse_header(Req#http_req{ host=Host, raw_host=RawHost3, port=Port, headers=[{'Host', RawHost3}|Req#http_req.headers]}, State); {'EXIT', _Reason} -> error_terminate(400, State) end; %% Ignore Host headers if we already have it. header({http_header, _I, 'Host', _R, _V}, Req, State) -> parse_header(Req, State); header({http_header, _I, 'Connection', _R, Connection}, Req=#http_req{headers=Headers}, State=#state{ req_keepalive=Keepalive, max_keepalive=MaxKeepalive}) when Keepalive < MaxKeepalive -> Req2 = Req#http_req{headers=[{'Connection', Connection}|Headers]}, {ConnTokens, Req3} = cowboy_http_req:parse_header('Connection', Req2), ConnAtom = cowboy_http:connection_to_atom(ConnTokens), parse_header(Req3#http_req{connection=ConnAtom}, State); header({http_header, _I, Field, _R, Value}, Req, State) -> Field2 = format_header(Field), parse_header(Req#http_req{headers=[{Field2, Value}|Req#http_req.headers]}, State); %% The Host header is required in HTTP/1.1. header(http_eoh, #http_req{version={1, 1}, host=undefined}, State) -> error_terminate(400, State); %% It is however optional in HTTP/1.0. header(http_eoh, Req=#http_req{version={1, 0}, transport=Transport, host=undefined}, State=#state{buffer=Buffer}) -> Port = default_port(Transport:name()), onrequest(Req#http_req{host=[], raw_host= <<>>, port=Port, buffer=Buffer}, State#state{buffer= <<>>}); header(http_eoh, Req, State=#state{buffer=Buffer}) -> onrequest(Req#http_req{buffer=Buffer}, State#state{buffer= <<>>}); header(_Any, _Req, State) -> error_terminate(400, State).
我們從 HttpFox中可以看到,Headers最后一行的為 Connection,那么我們重點看下當處理到最后一行時,程序又如何往下走呢?
好了,今天就到這里,下一篇,我們將解決上面留下的疑問,繼續為大家分享Cowboy的代碼,謝謝大家支持。