mongo-mapreduce測試(1)——count/sum/where條件
mongo-mapreduce測試(2)——列轉行與finalize函數
mongo-mapreduce測試(3)——group by having
mongo-mapreduce測試(7)——使用js存儲過程
mongo-mapreduce測試(9)——python調用
mongo-mapreduce測試(10)——階段總結(2)
一、實現多列的group by操作(方法1)。
類似如下SQL:
select PhoneId, hour, count(*)
from tianyc_test8
where PhoneId in ('xx','yy','zz')
group by PhoneId, hour;
1. 構建測試數據
在集合tianyc_test8中,每個PhoneId在每小時有多條數據,下面統計每個PhoneId在每小時的記錄數。
> db.tianyc_test8.find()
{ "_id" : ObjectId("50d0a5bdd03"), "PhoneId" : "460001201624570", "Time" : ISODate("2012-12-18T17:21:11Z") }
{ "_id" : ObjectId("50d0a5bdd0c"), "PhoneId" : "460001201521818", "Time" : ISODate("2012-12-18T17:21:12Z") }
{ "_id" : ObjectId("50d0a19194a"), "PhoneId" : "460001204101466", "Time" : ISODate("2012-12-18T17:23:01Z") }
{ "_id" : ObjectId("50d0a5bdd11"), "PhoneId" : "460001202749862", "Time" : ISODate("2012-12-18T17:19:43Z") }
{ "_id" : ObjectId("50d0a5bdd13"), "PhoneId" : "460001202749862", "Time" : ISODate("2012-12-18T17:21:13Z") }
{ "_id" : ObjectId("50d0a5bdd14"), "PhoneId" : "460001202749919", "Time" : ISODate("2012-12-18T17:21:14Z") }
{ "_id" : ObjectId("50d0a5bdd17"), "PhoneId" : "460001202445292", "Time" : ISODate("2012-12-18T17:21:14Z") }
{ "_id" : ObjectId("50d0a5bdd20"), "PhoneId" : "460001201829623", "Time" : ISODate("2012-12-18T17:21:15Z") }
{ "_id" : ObjectId("50d0a5bdd22"), "PhoneId" : "460001202441775", "Time" : ISODate("2012-12-18T17:21:15Z") }
{ "_id" : ObjectId("50d0a5bdd25"), "PhoneId" : "460001202441680", "Time" : ISODate("2012-12-18T17:21:14Z") }
{ "_id" : ObjectId("50d0a5bdd2a"), "PhoneId" : "460001204608783", "Time" : ISODate("2012-12-18T17:21:15Z") }
{ "_id" : ObjectId("50d0a5bdd38"), "PhoneId" : "460001202747843", "Time" : ISODate("2012-12-18T17:21:18Z") }
{ "_id" : ObjectId("50d0a5bdd3b"), "PhoneId" : "460001201624758", "Time" : ISODate("2012-12-18T17:21:18Z") }
{ "_id" : ObjectId("50d0a5bdd3d"), "PhoneId" : "460001201930213", "Time" : ISODate("2012-12-18T17:21:19Z") }
{ "_id" : ObjectId("50d0a2f1e84"), "PhoneId" : "460001203762426", "Time" : ISODate("2012-12-18T17:23:50Z") }
{ "_id" : ObjectId("50d0a0f0852"), "PhoneId" : "460001201624766", "Time" : ISODate("2012-12-18T17:23:05Z") }
{ "_id" : ObjectId("50d0a5bdd47"), "PhoneId" : "460001204101332", "Time" : ISODate("2012-12-18T17:20:54Z") }
{ "_id" : ObjectId("50d0c302f99"), "PhoneId" : "460001201624784", "Time" : ISODate("2012-12-18T19:36:16Z") }
{ "_id" : ObjectId("50d0c100588"), "PhoneId" : "460001204305245", "Time" : ISODate("2012-12-18T19:35:27Z") }
{ "_id" : ObjectId("50d0c100591"), "PhoneId" : "460001204305240", "Time" : ISODate("2012-12-18T19:35:30Z") }
has more
2. 編寫map函數,將需要groupby的兩列使用“-”拼接在一起,作為key。
> var m = function(){
... var hour=String(this.Time.getHours());
... if(hour.length==1) {
... hour='0'+hour
... };
... key=this.PhoneId+'-'+hour;
... emit(key,{count:1});
... }
3. 編寫reduce函數,將輸出結果進行拆分。
> var r = function (key,values) {
... var total = 0;
... var locate = key.indexOf('-');
... for (var i=0; i<values.length; i++) {
... total += values[i].count;
... }
... return {PhoneId:key.substr(0,locate), hour:key.substr(locate+1,2), count : total};
... };
4. 進行mapreduce,加入查詢條件
> var res = db.tianyc_test8.mapReduce(
... m,
... r,
... {query:{PhoneId:{$in:["460001203762426","460001201521818","460001202749919"]}},
... out:{replace:'tianyc_mr_Result1'}}
... )
5. 查看輸出結果
> db.tianyc_mr_Result1.find()
{ "_id" : "460001201521818-00", "value" : { "PhoneId" : "460001201521818", "hour" : "00", "count" : 112 } }
{ "_id" : "460001201521818-01", "value" : { "PhoneId" : "460001201521818", "hour" : "01", "count" : 65 } }
{ "_id" : "460001201521818-02", "value" : { "PhoneId" : "460001201521818", "hour" : "02", "count" : 52 } }
{ "_id" : "460001201521818-03", "value" : { "PhoneId" : "460001201521818", "hour" : "03", "count" : 98 } }
{ "_id" : "460001201521818-04", "value" : { "PhoneId" : "460001201521818", "hour" : "04", "count" : 112 } }
{ "_id" : "460001201521818-05", "value" : { "PhoneId" : "460001201521818", "hour" : "05", "count" : 112 } }
{ "_id" : "460001201521818-06", "value" : { "PhoneId" : "460001201521818", "hour" : "06", "count" : 111 } }
{ "_id" : "460001201521818-07", "value" : { "PhoneId" : "460001201521818", "hour" : "07", "count" : 111 } }
{ "_id" : "460001201521818-08", "value" : { "PhoneId" : "460001201521818", "hour" : "08", "count" : 105 } }
{ "_id" : "460001201521818-09", "value" : { "PhoneId" : "460001201521818", "hour" : "09", "count" : 114 } }
{ "_id" : "460001201521818-10", "value" : { "PhoneId" : "460001201521818", "hour" : "10", "count" : 107 } }
{ "_id" : "460001201521818-11", "value" : { "PhoneId" : "460001201521818", "hour" : "11", "count" : 115 } }
{ "_id" : "460001201521818-12", "value" : { "PhoneId" : "460001201521818", "hour" : "12", "count" : 89 } }
{ "_id" : "460001201521818-13", "value" : { "PhoneId" : "460001201521818", "hour" : "13", "count" : 114 } }
{ "_id" : "460001201521818-14", "value" : { "PhoneId" : "460001201521818", "hour" : "14", "count" : 114 } }
{ "_id" : "460001201521818-15", "value" : { "PhoneId" : "460001201521818", "hour" : "15", "count" : 115 } }
{ "_id" : "460001201521818-16", "value" : { "PhoneId" : "460001201521818", "hour" : "16", "count" : 115 } }
{ "_id" : "460001201521818-17", "value" : { "PhoneId" : "460001201521818", "hour" : "17", "count" : 112 } }
{ "_id" : "460001201521818-18", "value" : { "PhoneId" : "460001201521818", "hour" : "18", "count" : 115 } }
{ "_id" : "460001201521818-19", "value" : { "PhoneId" : "460001201521818", "hour" : "19", "count" : 70 } }
has more
> it
{ "_id" : "460001201521818-20", "value" : { "PhoneId" : "460001201521818", "hour" : "20", "count" : 99 } }
{ "_id" : "460001201521818-21", "value" : { "PhoneId" : "460001201521818", "hour" : "21", "count" : 118 } }
{ "_id" : "460001201521818-22", "value" : { "PhoneId" : "460001201521818", "hour" : "22", "count" : 116 } }
{ "_id" : "460001201521818-23", "value" : { "PhoneId" : "460001201521818", "hour" : "23", "count" : 112 } }
{ "_id" : "460001202749919-00", "value" : { "PhoneId" : "460001202749919", "hour" : "00", "count" : 112 } }
{ "_id" : "460001202749919-01", "value" : { "PhoneId" : "460001202749919", "hour" : "01", "count" : 111 } }
{ "_id" : "460001202749919-02", "value" : { "PhoneId" : "460001202749919", "hour" : "02", "count" : 112 } }
{ "_id" : "460001202749919-03", "value" : { "PhoneId" : "460001202749919", "hour" : "03", "count" : 46 } }
{ "_id" : "460001202749919-04", "value" : { "PhoneId" : "460001202749919", "hour" : "04", "count" : 108 } }
{ "_id" : "460001202749919-05", "value" : { "PhoneId" : "460001202749919", "hour" : "05", "count" : 110 } }
{ "_id" : "460001202749919-06", "value" : { "PhoneId" : "460001202749919", "hour" : "06", "count" : 62 } }
{ "_id" : "460001202749919-07", "value" : { "PhoneId" : "460001202749919", "hour" : "07", "count" : 108 } }
{ "_id" : "460001202749919-08", "value" : { "PhoneId" : "460001202749919", "hour" : "08", "count" : 112 } }
{ "_id" : "460001202749919-09", "value" : { "PhoneId" : "460001202749919", "hour" : "09", "count" : 112 } }
{ "_id" : "460001202749919-10", "value" : { "PhoneId" : "460001202749919", "hour" : "10", "count" : 108 } }
{ "_id" : "460001202749919-11", "value" : { "PhoneId" : "460001202749919", "hour" : "11", "count" : 112 } }
{ "_id" : "460001202749919-12", "value" : { "PhoneId" : "460001202749919", "hour" : "12", "count" : 84 } }
{ "_id" : "460001202749919-13", "value" : { "PhoneId" : "460001202749919", "hour" : "13", "count" : 112 } }
{ "_id" : "460001202749919-14", "value" : { "PhoneId" : "460001202749919", "hour" : "14", "count" : 112 } }
{ "_id" : "460001202749919-15", "value" : { "PhoneId" : "460001202749919", "hour" : "15", "count" : 112 } }
has more
二、實現多列的group by操作(方法2)。
在map函數中,將key設置為內嵌的集合。此時MR過程與單個列的group by相同。
> var m = function(){
... var hour=String(this.Time.getHours());
... if(hour.length==1) {hour='0'+hour};
... key={PhoneId:this.PhoneId,hour:hour};
... emit(key,{count:1});
... }
> var r = function (key,values) {
... var total = 0;
... for (var i=0; i<values.length; i++) {
... total += values[i].count;
... }
... return {count : total};
... };
> var res = db.tianyc_test8.mapReduce(
... m,
... r,
... {query:{PhoneId:{$in:["460001203762426","460001201521818","460001202749919"]}},
... out:{replace:'tianyc_mr_Result1'}}
... )
> db.tianyc_mr_Result1.find()
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "00" }, "value" : { "count" : 112 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "01" }, "value" : { "count" : 65 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "02" }, "value" : { "count" : 52 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "03" }, "value" : { "count" : 98 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "04" }, "value" : { "count" : 112 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "05" }, "value" : { "count" : 112 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "06" }, "value" : { "count" : 111 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "07" }, "value" : { "count" : 111 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "08" }, "value" : { "count" : 105 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "09" }, "value" : { "count" : 114 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "10" }, "value" : { "count" : 107 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "11" }, "value" : { "count" : 115 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "12" }, "value" : { "count" : 89 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "13" }, "value" : { "count" : 114 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "14" }, "value" : { "count" : 114 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "15" }, "value" : { "count" : 115 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "16" }, "value" : { "count" : 115 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "17" }, "value" : { "count" : 112 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "18" }, "value" : { "count" : 115 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "19" }, "value" : { "count" : 70 } }
has more
三、實現多列的group by having操作。
類似如下SQL:
select PhoneId, hour, count(*)
from tianyc_test8
where PhoneId in ('xx','yy','zz')
group by PhoneId, hour having count(*)>=100;
1. 使用第一種group by方法進行測試。map和reduce函數不變,增加finalize函數,設置指示列get。
> var f = function(key, rvalues){
... if (rvalues.count>=100){rvalues.get='true';}
... else {rvalues.get='false'};
... return rvalues;
... }
2. 使用runCommand來執行mapredurce。
> var res = db.runCommand({
... mapreduce:'tianyc_test8',
... map:m,
... reduce:r,
... finalize:f,
... query:{PhoneId:{$in:["460001203762426","460001201521818","460001202749919"]}},
... out:'tianyc_mr_Result1'
... }
... )
3. 查詢結果。
使用get列來過濾需要的數據,並隱藏該列:
> db.tianyc_mr_Result1.find({'value.get':'true'},{'value.get':0})
{ "_id" : "460001201521818-00", "value" : { "PhoneId" : "460001201521818", "hour" : "00", "count" : 112 } }
{ "_id" : "460001201521818-04", "value" : { "PhoneId" : "460001201521818", "hour" : "04", "count" : 112 } }
{ "_id" : "460001201521818-05", "value" : { "PhoneId" : "460001201521818", "hour" : "05", "count" : 112 } }
{ "_id" : "460001201521818-06", "value" : { "PhoneId" : "460001201521818", "hour" : "06", "count" : 111 } }
{ "_id" : "460001201521818-07", "value" : { "PhoneId" : "460001201521818", "hour" : "07", "count" : 111 } }
{ "_id" : "460001201521818-08", "value" : { "PhoneId" : "460001201521818", "hour" : "08", "count" : 105 } }
{ "_id" : "460001201521818-09", "value" : { "PhoneId" : "460001201521818", "hour" : "09", "count" : 114 } }
{ "_id" : "460001201521818-10", "value" : { "PhoneId" : "460001201521818", "hour" : "10", "count" : 107 } }
{ "_id" : "460001201521818-11", "value" : { "PhoneId" : "460001201521818", "hour" : "11", "count" : 115 } }
{ "_id" : "460001201521818-13", "value" : { "PhoneId" : "460001201521818", "hour" : "13", "count" : 114 } }
{ "_id" : "460001201521818-14", "value" : { "PhoneId" : "460001201521818", "hour" : "14", "count" : 114 } }
{ "_id" : "460001201521818-15", "value" : { "PhoneId" : "460001201521818", "hour" : "15", "count" : 115 } }
{ "_id" : "460001201521818-16", "value" : { "PhoneId" : "460001201521818", "hour" : "16", "count" : 115 } }
{ "_id" : "460001201521818-17", "value" : { "PhoneId" : "460001201521818", "hour" : "17", "count" : 112 } }
{ "_id" : "460001201521818-18", "value" : { "PhoneId" : "460001201521818", "hour" : "18", "count" : 115 } }
{ "_id" : "460001201521818-21", "value" : { "PhoneId" : "460001201521818", "hour" : "21", "count" : 118 } }
{ "_id" : "460001201521818-22", "value" : { "PhoneId" : "460001201521818", "hour" : "22", "count" : 116 } }
{ "_id" : "460001201521818-23", "value" : { "PhoneId" : "460001201521818", "hour" : "23", "count" : 112 } }
{ "_id" : "460001202749919-00", "value" : { "PhoneId" : "460001202749919", "hour" : "00", "count" : 112 } }
{ "_id" : "460001202749919-01", "value" : { "PhoneId" : "460001202749919", "hour" : "01", "count" : 111 } }
has more
>