1. 程式人生 > >erlang下lists模塊sort(排序)方法源碼解析

erlang下lists模塊sort(排序)方法源碼解析

簡單的 我們 asc HR 最簡 lse color ble 最小

排序算法一直是各種語言最簡單也是最復雜的算法,例如十大經典排序算法(動圖演示)裏面講的那樣

第一次看lists的sort方法的時候,蒙了,幾百行的代碼,我心想要這麽復雜麽(因為C語言的冒泡排序我記得不超過30行),於是自己就實現了下

結果更蒙了

bubble_sort(L)->
	bubble_sort(L,length(L)).
 
bubble_sort(L,0)->
	L;
bubble_sort(L,N)->
	bubble_sort(do_bubble_sort(L),N-1).
 
do_bubble_sort([A])->
	[A];
do_bubble_sort([A,B|R])->
case A<B of
	true ->
		[A|do_bubble_sort([B|R])];
	false ->
		[B|do_bubble_sort([A|R])]
end.

對比結果如下

6> timer:tc(tt1,bubble_sort,[B]).
{21130,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
7> timer:tc(lists,sort,[B]).     
{162,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
8> 

B是一個打亂順序的1到1000的序列,我X,這不是一個數量級的算法啊~~~~,不是說好越簡單的代碼越快麽,三觀被刷新了。

還是老實讀lists的源碼,一共300+行,摘錄於lists.er

-spec sort(List1) -> List2 when
      List1 :: [T],
      List2 :: [T],
      T :: term().

sort([X, Y | L] = L0) when X =< Y ->
    case L of
    [] -> 
        L0;
    [Z] when Y =< Z ->
        L0;
    [Z] when X =< Z ->
        [X, Z, Y];
    [Z] 
-> [Z, X, Y]; _ when X == Y -> sort_1(Y, L, [X]); _ -> split_1(X, Y, L, [], []) end; sort([X, Y | L]) -> case L of [] -> [Y, X]; [Z] when X =< Z -> [Y, X | L]; [Z] when Y =< Z -> [Y, Z, X]; [Z] -> [Z, Y, X]; _ -> split_2(X, Y, L, [], []) end; sort([_] = L) -> L; sort([] = L) -> L. sort_1(X, [Y | L], R) when X == Y -> sort_1(Y, L, [X | R]); sort_1(X, [Y | L], R) when X < Y -> split_1(X, Y, L, R, []); sort_1(X, [Y | L], R) -> split_2(X, Y, L, R, []); sort_1(X, [], R) -> lists:reverse(R, [X]). %% Ascending. split_1(X, Y, [Z | L], R, Rs) when Z >= Y -> % io:format("here is 131 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p~n",[X, Y, Z, L, R, Rs]), split_1(Y, Z, L, [X | R], Rs); split_1(X, Y, [Z | L], R, Rs) when Z >= X -> % io:format("here is 134 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p~n",[X, Y, Z, L, R, Rs]), split_1(Z, Y, L, [X | R], Rs); split_1(X, Y, [Z | L], [], Rs) -> % io:format("here is 137 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p~n",[X, Y, Z, L, [], Rs]), split_1(X, Y, L, [Z], Rs); split_1(X, Y, [Z | L], R, Rs) -> % io:format("here is 140 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p~n",[X, Y, Z, L, R, Rs]), split_1_1(X, Y, L, R, Rs, Z); split_1(X, Y, [], R, Rs) -> % io:format("here is 143 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p~n",[X, Y, [[Y, X | R] | Rs], [], R, Rs]), rmergel([[Y, X | R] | Rs], []). split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= Y -> % io:format("here is 147 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p,S:~p~n",[X, Y, Z, L, R, Rs, S]), split_1_1(Y, Z, L, [X | R], Rs, S); split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= X -> % io:format("here is 150 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p,S:~p~n",[X, Y, Z, L, R, Rs, S]), split_1_1(Z, Y, L, [X | R], Rs, S); split_1_1(X, Y, [Z | L], R, Rs, S) when S =< Z -> % io:format("here is 153 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p,S:~p~n",[X, Y, Z, L, R, Rs, S]), split_1(S, Z, L, [], [[Y, X | R] | Rs]); split_1_1(X, Y, [Z | L], R, Rs, S) -> % io:format("here is 156 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p,S:~p~n",[X, Y, Z, L, R, Rs, S]), split_1(Z, S, L, [], [[Y, X | R] | Rs]); split_1_1(X, Y, [], R, Rs, S) -> % io:format("here is 159 X:~p,Y:~p,Z:~p,L:~p,R:~p,Rs:~p,S:~p~n",[X, Y, [[S], [Y, X | R] | Rs], [], R, Rs, S]), rmergel([[S], [Y, X | R] | Rs], []). %% Descending. split_2(X, Y, [Z | L], R, Rs) when Z =< Y -> split_2(Y, Z, L, [X | R], Rs); split_2(X, Y, [Z | L], R, Rs) when Z =< X -> split_2(Z, Y, L, [X | R], Rs); split_2(X, Y, [Z | L], [], Rs) -> split_2(X, Y, L, [Z], Rs); split_2(X, Y, [Z | L], R, Rs) -> split_2_1(X, Y, L, R, Rs, Z); split_2(X, Y, [], R, Rs) -> mergel([[Y, X | R] | Rs], []). split_2_1(X, Y, [Z | L], R, Rs, S) when Z =< Y -> split_2_1(Y, Z, L, [X | R], Rs, S); split_2_1(X, Y, [Z | L], R, Rs, S) when Z =< X -> split_2_1(Z, Y, L, [X | R], Rs, S); split_2_1(X, Y, [Z | L], R, Rs, S) when S > Z -> split_2(S, Z, L, [], [[Y, X | R] | Rs]); split_2_1(X, Y, [Z | L], R, Rs, S) -> split_2(Z, S, L, [], [[Y, X | R] | Rs]); split_2_1(X, Y, [], R, Rs, S) -> mergel([[S], [Y, X | R] | Rs], []). %% merge/1 mergel([[] | L], Acc) -> mergel(L, Acc); mergel([T1, [H2 | T2], [H3 | T3] | L], Acc) -> mergel(L, [merge3_1(T1, [], H2, T2, H3, T3) | Acc]); mergel([T1, [H2 | T2]], Acc) -> rmergel([merge2_1(T1, H2, T2, []) | Acc], []); mergel([L], []) -> L; mergel([L], Acc) -> rmergel([lists:reverse(L, []) | Acc], []); mergel([], []) -> []; mergel([], Acc) -> rmergel(Acc, []); mergel([A, [] | L], Acc) -> mergel([A | L], Acc); mergel([A, B, [] | L], Acc) -> mergel([A, B | L], Acc). rmergel([[H3 | T3], [H2 | T2], T1 | L], Acc) -> rmergel(L, [rmerge3_1(T1, [], H2, T2, H3, T3) | Acc]); rmergel([[H2 | T2], T1], Acc) -> mergel([rmerge2_1(T1, H2, T2, []) | Acc], []); rmergel([L], Acc) -> mergel([lists:reverse(L, []) | Acc], []); rmergel([], Acc) -> mergel(Acc, []). %% merge3/3 %% Take L1 apart. merge3_1([H1 | T1], M, H2, T2, H3, T3) when H1 =< H2 -> merge3_12(T1, H1, H2, T2, H3, T3, M); merge3_1([H1 | T1], M, H2, T2, H3, T3) -> merge3_21(T1, H1, H2, T2, H3, T3, M); merge3_1([], M, H2, T2, H3, T3) when H2 =< H3 -> merge2_1(T2, H3, T3, [H2 | M]); merge3_1([], M, H2, T2, H3, T3) -> merge2_2(T2, H3, T3, M, H2). %% Take L2 apart. merge3_2(T1, H1, M, [H2 | T2], H3, T3) when H1 =< H2 -> merge3_12(T1, H1, H2, T2, H3, T3, M); merge3_2(T1, H1, M, [H2 | T2], H3, T3) -> merge3_21(T1, H1, H2, T2, H3, T3, M); merge3_2(T1, H1, M, [], H3, T3) when H1 =< H3 -> merge2_1(T1, H3, T3, [H1 | M]); merge3_2(T1, H1, M, [], H3, T3) -> merge2_2(T1, H3, T3, M, H1). % H1 =< H2. Inlined. merge3_12(T1, H1, H2, T2, H3, T3, M) when H1 =< H3 -> merge3_1(T1, [H1 | M], H2, T2, H3, T3); merge3_12(T1, H1, H2, T2, H3, T3, M) -> merge3_12_3(T1, H1, H2, T2, [H3 | M], T3). % H1 =< H2, take L3 apart. merge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) when H1 =< H3 -> merge3_1(T1, [H1 | M], H2, T2, H3, T3); merge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) -> merge3_12_3(T1, H1, H2, T2, [H3 | M], T3); merge3_12_3(T1, H1, H2, T2, M, []) -> merge2_1(T1, H2, T2, [H1 | M]). % H1 > H2. Inlined. merge3_21(T1, H1, H2, T2, H3, T3, M) when H2 =< H3 -> merge3_2(T1, H1, [H2 | M], T2, H3, T3); merge3_21(T1, H1, H2, T2, H3, T3, M) -> merge3_21_3(T1, H1, H2, T2, [H3 | M], T3). % H1 > H2, take L3 apart. merge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) when H2 =< H3 -> merge3_2(T1, H1, [H2 | M], T2, H3, T3); merge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) -> merge3_21_3(T1, H1, H2, T2, [H3 | M], T3); merge3_21_3(T1, H1, H2, T2, M, []) -> merge2_2(T1, H2, T2, M, H1). %% rmerge/3 %% Take L1 apart. rmerge3_1([H1 | T1], M, H2, T2, H3, T3) when H1 =< H2 -> rmerge3_12(T1, H1, H2, T2, H3, T3, M); rmerge3_1([H1 | T1], M, H2, T2, H3, T3) -> rmerge3_21(T1, H1, H2, T2, H3, T3, M); rmerge3_1([], M, H2, T2, H3, T3) when H2 =< H3 -> rmerge2_2(T2, H3, T3, M, H2); rmerge3_1([], M, H2, T2, H3, T3) -> rmerge2_1(T2, H3, T3, [H2 | M]). %% Take L2 apart. rmerge3_2(T1, H1, M, [H2 | T2], H3, T3) when H1 =< H2 -> rmerge3_12(T1, H1, H2, T2, H3, T3, M); rmerge3_2(T1, H1, M, [H2 | T2], H3, T3) -> rmerge3_21(T1, H1, H2, T2, H3, T3, M); rmerge3_2(T1, H1, M, [], H3, T3) when H1 =< H3 -> rmerge2_2(T1, H3, T3, M, H1); rmerge3_2(T1, H1, M, [], H3, T3) -> rmerge2_1(T1, H3, T3, [H1 | M]). % H1 =< H2. Inlined. rmerge3_12(T1, H1, H2, T2, H3, T3, M) when H2 =< H3 -> rmerge3_12_3(T1, H1, H2, T2, [H3 | M], T3); rmerge3_12(T1, H1, H2, T2, H3, T3, M) -> rmerge3_2(T1, H1, [H2 | M], T2, H3, T3). % H1 =< H2, take L3 apart. rmerge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) when H2 =< H3 -> rmerge3_12_3(T1, H1, H2, T2, [H3 | M], T3); rmerge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) -> rmerge3_2(T1, H1, [H2 | M], T2, H3, T3); rmerge3_12_3(T1, H1, H2, T2, M, []) -> rmerge2_2(T1, H2, T2, M, H1). % H1 > H2. Inlined. rmerge3_21(T1, H1, H2, T2, H3, T3, M) when H1 =< H3 -> rmerge3_21_3(T1, H1, H2, T2, [H3 | M], T3); rmerge3_21(T1, H1, H2, T2, H3, T3, M) -> rmerge3_1(T1, [H1 | M], H2, T2, H3, T3). % H1 > H2, take L3 apart. rmerge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) when H1 =< H3 -> rmerge3_21_3(T1, H1, H2, T2, [H3 | M], T3); rmerge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) -> rmerge3_1(T1, [H1 | M], H2, T2, H3, T3); rmerge3_21_3(T1, H1, H2, T2, M, []) -> rmerge2_1(T1, H2, T2, [H1 | M]). %% merge/2 merge2_1([H1 | T1], H2, T2, M) when H1 =< H2 -> merge2_1(T1, H2, T2, [H1 | M]); merge2_1([H1 | T1], H2, T2, M) -> merge2_2(T1, H2, T2, M, H1); merge2_1([], H2, T2, M) -> lists:reverse(T2, [H2 | M]). merge2_2(T1, HdM, [H2 | T2], M, H1) when H1 =< H2 -> merge2_1(T1, H2, T2, [H1, HdM | M]); merge2_2(T1, HdM, [H2 | T2], M, H1) -> merge2_2(T1, H2, T2, [HdM | M], H1); merge2_2(T1, HdM, [], M, H1) -> lists:reverse(T1, [H1, HdM | M]). %% rmerge/2 rmerge2_1([H1 | T1], H2, T2, M) when H1 =< H2 -> rmerge2_2(T1, H2, T2, M, H1); rmerge2_1([H1 | T1], H2, T2, M) -> rmerge2_1(T1, H2, T2, [H1 | M]); rmerge2_1([], H2, T2, M) -> lists:reverse(T2, [H2 | M]). rmerge2_2(T1, HdM, [H2 | T2], M, H1) when H1 =< H2 -> rmerge2_2(T1, H2, T2, [HdM | M], H1); rmerge2_2(T1, HdM, [H2 | T2], M, H1) -> rmerge2_1(T1, H2, T2, [H1, HdM | M]); rmerge2_2(T1, HdM, [], M, H1) -> lists:reverse(T1, [H1, HdM | M]).

好,這是我見過最復雜的排序算法了。

這個算法和歸並排序有點像,可是由於erlang的特性,變量不能變,使得和大部分的排序方法有很大的區別,這個算法的復雜度應該是0(2n)

這個算法可以份3大塊,第一塊是sort_*函數,第二塊是split_*,第3塊是rmergel和mergel

首先

sort([X, Y | L] = L0) when X =< Y ->  %當list是3個對比會返回,當list超過3個進入sort_1或者splite_*函數
..........
sort([X, Y | L]) ->            %分了2種情況,第一個元素大於第二個 或者 第一個元素小於等於第二個
.......
sort([_] = L) ->         %list只有1個也直接返回   
    L;
sort([] = L) ->          %list為空直接返回
    L.

sort_1(X, [Y | L], R) when X == Y ->
    sort_1(Y, L, [X | R]);
sort_1(X, [Y | L], R) when X < Y ->
    split_1(X, Y, L, R, []);
sort_1(X, [Y | L], R) ->
    split_2(X, Y, L, R, []);
sort_1(X, [], R) ->
    lists:reverse(R, [X]).

當這段代碼還是比較清晰的,就說把超過3個元素的list傳入split_*

下面看split_1系列

%% Ascending.

split_1(X, Y, [Z | L], R, Rs) when Z >= Y ->  %這裏的時候是X<Y,也就是Z>=Y就是說這時X<Y<=Z,我們把最小X的放到R裏面,而且Y,Z替換X,Y

  split_1(Y, Z, L, [X | R], Rs);

split_1(X, Y, [Z | L], R, Rs) when Z >= X ->  %這裏的時候Z>=X,也就是X<=Z<Y,我們把最小的X放到R裏面,而且Z替代X成了Z,Y

  split_1(Z, Y, L, [X | R], Rs);

split_1(X, Y, [Z | L], [], Rs) ->        %這裏的時候Z<X,也就是Z<X<Y,我們把最小的Z放到R裏面(R目前為空)

  split_1(X, Y, L, [Z], Rs);

split_1(X, Y, [Z | L], R, Rs) ->         %這裏的時候Z<X,也就是Z<X<Y,我們把最小的Z放到最後的參數(R不為空的時候),調用split_1_1,為什麽???

  split_1_1(X, Y, L, R, Rs, Z);

split_1(X, Y, [], R, Rs) ->            %當列表完成後調用下個函數rmerge1,這個後面再講

  rmergel([[Y, X | R] | Rs], []).

WTF,這些到底在幹什麽,erlang又沒有調試跟蹤,又沒說明,完全就蒙了,仔細研究下終於明白了這2個函數的意義,不得說寫源碼的真是大神啊~~~

通過上面的分析,我們知道了一個規律,每次都會比較3個數的大小,而且還會處理其中最小的數

X:下樁 Y:上樁, Z:目前list的第一個元素 R:經過排序了的list,Rs和S是split_1_1使用的變量

split_1這個函數的左右是把X,Y,Z中最小的放到R中,同時要保證這個數比R中現有的元素都大,

這個怎麽保證呢,當Z>X(包括Z>X和Z>Y兩種情況)的時候把直接X放進去R,

原因就是X一直小於Y,而且R裏面的元素都比X小才放進去的,而且整個過程X和Y的值都是增加的,所以X肯定大於R中的任何一個

開始是R代表R中任何一個),假設Z0>Y0

  1. R0<X0<Y0<Z0 開始R0為空,比較成立
  2. R1<X1<Y1<Z1 這時R1=[X0|R0],X1=Y0,Y1=Z0,當Z1>Y1,比較還是成立
  3. R2<X2<Y2<Z2 這時R2=[X1|R1],X2=Y1,Y2=Z1,當Z2>Y2,比較還是成立
  4. 。。。。。。。

當Z>X的時候也一樣,於是當Z>X或者Z>Y的時候,只要把X的值放到R中就行,R裏面的元素越來越大,是排好序的(從大到小),於是上面綠色的註釋的代碼就能理解了

藍色的註釋代碼當R為空, Z<X<Y,當然R<Z<X<Y,於是也能理解了

主要是褐色的代碼模塊當R不為空,我們知道R<X<Y,而且Z<X<Y,可是R裏面的元素和Z不能確定,

於是我們知道了當前最小的是Z,可是Z不一定大於R的所有元素,上面的split_1函數的邏輯就不通了,然後把Z存入到最後一個參數進入split_1_1

我們來查看split_1_1

split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= Y ->    %這時候X<Y<=Z,R<X, S<X我們這裏不管S於是R<X<Y<=Z,按照上面邏輯,X存入R,Y,Z替換X,Y
    split_1_1(Y, Z, L, [X | R], Rs, S);
split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= X ->    %這時候X<=Z<Y,R<X, S<X我們這裏不管S於是R<X<=Z<Y,按照上面邏輯,X存入R,Z替換X
    split_1_1(Z, Y, L, [X | R], Rs, S);
split_1_1(X, Y, [Z | L], R, Rs, S) when S =< Z ->     %這時候S<=Z<X<Y,R<X,在這裏我們知道Y>X>R,這裏S,Z設置為X,Y,因為X,Y被重新設置,所以後面沒有比較性
    split_1(S, Z, L, [], [[Y, X | R] | Rs]);       %於是我們把Y,X存入R(R裏面的還是有序的),然後把R存入RS,清空R,返回到開始split_1的函數
split_1_1(X, Y, [Z | L], R, Rs, S) ->           %當S>Z一樣
    split_1(Z, S, L, [], [[Y, X | R] | Rs]);
split_1_1(X, Y, [], R, Rs, S) ->    
    rmergel([[S], [Y, X | R] | Rs], []).

我們可以看到,紫色註釋的代碼,當S<=Z<X<Y,R<X我們知道最小的數是S,然後是Z,可是我們不能比較R裏面的元素與這2個數的大小,

如果按照上面函數的邏輯,可以在弄個函數split_1_1_1,可這樣函數不是閉環的,於是大神直接把肯定比R大的2個元素存入R(保證了R的有序),再回到split_1,這裏真是太厲害了

 1 X:12,Y:13,Z:54,L:[32,1,4521,32,214,541,1,12,3],R:[],Rs:[]
 2 X:13,Y:54,Z:32,L:[1,4521,32,214,541,1,12,3],R:"\f",Rs:[]
 3 X:32,Y:54,Z:1,L:[4521,32,214,541,1,12,3],R:"\r\f",Rs:[]
 4 X:32,Y:54,Z:4521,L:[32,214,541,1,12,3],R:"\r\f",Rs:[],S:1
 5 X:54,Y:4521,Z:32,L:[214,541,1,12,3],R:" \r\f",Rs:[],S:1
 6 X:1,Y:32,Z:214,L:[541,1,12,3],R:[],Rs:[[4521,54,32,13,12]]
 7 X:32,Y:214,Z:541,L:[1,12,3],R:[1],Rs:[[4521,54,32,13,12]]
 8 X:214,Y:541,Z:1,L:[12,3],R:[32,1],Rs:[[4521,54,32,13,12]]
 9 X:214,Y:541,Z:12,L:[3],R:[32,1],Rs:[[4521,54,32,13,12]],S:1
10 X:1,Y:12,Z:3,L:[],R:[],Rs:[[541,214,32,1],[4521,54,32,13,12]]
11 Rs:[[12,3,1],[541,214,32,1],[4521,54,32,13,12]]
我們看個簡單的例子執行過程,大概就能明白這個邏輯了。
這裏的List = [12,13,54,32,1,4521,32,214,541,1,12,3],這2個函數執行完成後的結果是[[12,3,1],[541,214,32,1],[4521,54,32,13,12]]
可以看到這裏經過了N次循環(N是List長度),生成了幾個子list,每個子list都是有序的,這樣肯定沒有完成,剩下的就是mergel和rmergel函數的作用了

篇幅太長,不好排版,下面的函數分析放

erlang下lists模塊sort(排序)方法源碼解析(二)

未完待續。。。

erlang下lists模塊sort(排序)方法源碼解析