[Erlang 0128] Term sharing in Erlang/OTP 下篇


 

  繼續昨天的話題,昨天提到io:format對數據共享的間接影響,如果是下面兩種情況恐怕更容易成為"坑", 呃,恰好我都遇到過;

  如果是測試代碼是下面這樣,得到的結果會是怎樣?猜!

s2()->
  L=[1,2,3,4,5,6],
  L2=[L,L,L,L],
erlang:display( {{erts_debug:size(L),erts_debug:flat_size(L)},{erts_debug:size(L2),erts_debug:flat_size(L2)}}
).

 

結果是

 

5> d:s2().

{{12,12},{56,56}}

  

 

  這個結果出來之后,我足足用了5分鍾用來懷疑人生,為什么和期望的結果不一樣呢?是因為我現在用的最新版本(17.2)嗎?是實現已經修改掉但是沒有更新文檔嗎?出於好奇,我還是按照之前探索問題的套路,生成了一下to_core文件,真相大白:

 

's2'/0 =
    %% Line 11
    fun () ->
        let <_cor5> =
            %% Line 14
            call 'erts_debug':'size'
                ([1|[2|[3|[4|[5|[6]]]]]])
        in  let <_cor4> =
                %% Line 14
                call 'erts_debug':'flat_size'
                    ([1|[2|[3|[4|[5|[6]]]]]])
            in  let <_cor3> =
                    %% Line 14
                    call 'erts_debug':'size'
                        ([[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]]]]])
                in  let <_cor2> =
                        %% Line 14
                        call 'erts_debug':'flat_size'
                            ([[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]]]]])
                    in  %% Line 14
                        call 'erlang':'display'
                            ({{_cor5,_cor4},{_cor3,_cor2}})

 

修改一下代碼: 

s3(L)->
    L2=[L,L,L,L],
    {{erts_debug:size(L),erts_debug:flat_size(L)},{erts_debug:size(L2),erts_debug:flat_size(L2)}}
.

  

對應的s3的代碼是

 

's3'/1 =
    %% Line 18
    fun (_cor0) ->
        let <L2> =
            %% Line 19
            [_cor0|[_cor0|[_cor0|[_cor0|[]]]]]
        in  let <_cor5> =
                %% Line 20
                call 'erts_debug':'size'
                    (_cor0)
            in  let <_cor4> =
                    %% Line 20
                    call 'erts_debug':'flat_size'
                        (_cor0)
                in  let <_cor3> =
                        %% Line 20
                        call 'erts_debug':'size'
                            (L2)
                    in  let <_cor2> =
                            %% Line 20
                            call 'erts_debug':'flat_size'
                                (L2)
                        in  %% Line 20
                            {{_cor5,_cor4},{_cor3,_cor2}}

  

  換句話,在編譯階段s2方法里面的常量數據就已經展開了,所以L2無論是size還是flat_size都是一樣的.之所以要先把這個測試做了,就是避免后面的測試誤入陷阱.

 

 這個怎么破呢?除了上面傳入參數的方法之外,還有一個路子:換成函數調用即可,如下:

 

s4()->
  L=lists:seq(1,6),
  L2=[L,L,L,L],
erlang:display( {{erts_debug:size(L),erts_debug:flat_size(L)},{erts_debug:size(L2),erts_debug:flat_size(L2)}}
).

  

 

對應的代碼為:

 

's4'/0 =
    %% Line 24
    fun () ->
        let <L> =
            %% Line 25
            call 'lists':'seq'
                (1, 6)
        in  let <L2> =
                %% Line 26
                [L|[L|[L|[L|[]]]]]
            in  let <_cor5> =
                    %% Line 27
                    call 'erts_debug':'size'
                        (L)
                in  let <_cor4> =
                        %% Line 27
                        call 'erts_debug':'flat_size'
                            (L)
                    in  let <_cor3> =
                            %% Line 27
                            call 'erts_debug':'size'
                                (L2)
                        in  let <_cor2> =
                                %% Line 27
                                call 'erts_debug':'flat_size'
                                    (L2)
                            in  %% Line 27
                                call 'erlang':'display'
                                    ({{_cor5,_cor4},{_cor3,_cor2}})

  

 

 

不要小看這個問題,這樣一個常量優化在極端情況下會有"大驚喜",論文里面給了這樣一個例子:

show_compiler_crashes() ->

L0 = [0],

L1 = [L0, L0, L0, L0, L0, L0, L0, L0, L0, L0],

L2 = [L1, L1, L1, L1, L1, L1, L1, L1, L1, L1],

L3 = [L2, L2, L2, L2, L2, L2, L2, L2, L2, L2],

L4 = [L3, L3, L3, L3, L3, L3, L3, L3, L3, L3],

L5 = [L4, L4, L4, L4, L4, L4, L4, L4, L4, L4],

L6 = [L5, L5, L5, L5, L5, L5, L5, L5, L5, L5],

L7 = [L6, L6, L6, L6, L6, L6, L6, L6, L6, L6],

L8 = [L7, L7, L7, L7, L7, L7, L7, L7, L7, L7],

L9 = [L8, L8, L8, L8, L8, L8, L8, L8, L8, L8],

L = [L9, L9, L9, L9, L9, L9, L9, L9, L9, L9],

L.

 

  

影響有多大呢?看結果:
 
After a bit more of 45 minutes of struggling, the compiler tries to allocate 3.7 GB of memory and gives up:

$ erlc demo.erl
Crash dump was written to: erl_crash.dump
eheap_alloc: Cannot allocate 3716993744 bytes of
memory (of type "heap_frag").
Abort

 

  好吧,勇於自黑,由於上面遇到這樣讓人惱火的問題,我決定在Shell中完成后續的測試,然后,我一腳踏進"新坑":

 

陷阱2 Shell ! Shell !

 

Eshell V6.0  (abort with ^G)
1> L=[1,2,3,4,5,6,7,8,9,10].
[1,2,3,4,5,6,7,8,9,10]
2>  L2=[L,L,L,L,L,L].
[[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10]]
3> erts_debug:size(L2).
32
4> erts_debug:flat_size(L2).
132
5> io:format("~p",[L2]).
[[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10]]ok
6> erts_debug:size(L2).
32
7> erts_debug:flat_size(L2).
132

  

 

    一開始啟動shell的時候,Shell的Pid是<0.33.0>.然后我們在中間故意執行一個不存在的方法 fake:fake().這時查看一下,Shell已經重啟,Pid變成<0.40.0>.注意再執行erts_debug:size(L2).結果已經變成了132了,換句話說,這里L2數據已經展開了.

 

Eshell V6.0  (abort with ^G)
1> self().
<0.33.0>
2>  L=[1,2,3,4,5,6,7,8,9,10].
[1,2,3,4,5,6,7,8,9,10]
3>  L2=[L,L,L,L,L,L].
[[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10],
[1,2,3,4,5,6,7,8,9,10]]
4>  erts_debug:size(L2).
32
5> erts_debug:flat_size(L2).
132
6> fake:fake().
** exception error: undefined function fake:fake/0
7> self().
<0.40.0>
8>  erts_debug:size(L2).
132
9> erts_debug:flat_size(L2).
132
10>

  

 

   那為什么會觸發數據展開(expand ,flattening)呢? 看下面的代碼,在Shell啟動的時候,會把之前已經綁定的變量作為spawn_link參數以啟動新的shell.

 

 

erl6.2\lib\stdlib-2.2\src

start_eval(Bs, RT, Ds) ->
    Self = self(),
    Eval = spawn_link(fun() -> evaluator(Self, Bs, RT, Ds) end),
    put(evaluator, Eval),
    Eval.

  

  換句話說,Erlang中使用spawn創建進程,傳入的參數(包括函數閉包),需要拷貝到新進程的heap,換句話說進程創建的時候需要考慮參數的大小.

 

  OK,這個問題差不多了,休息.


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM