MongoDB4:聚合函式group和mapReduce
阿新 • • 發佈:2019-01-09
1.group測試:
db.order.insert({id:123,mount:500}) yooo:PRIMARY> db.order.find() { "_id" : ObjectId("5b387105c0298213b42c35de"), "id" : 123, "mount" : 500 } { "_id" : ObjectId("5b387113c0298213b42c35df"), "id" : 123, "mount" : 250 } { "_id" : ObjectId("5b387124c0298213b42c35e0"), "id" : 212, "mount" : 200 } { "_id" : ObjectId("5b387131c0298213b42c35e1"), "id" : 123, "mount" : 300 } { "_id" : ObjectId("5b387a62c0298213b42c35e2"), "id" : 212, "mount" : 275 } yooo:PRIMARY> db.order.group( ... { ... key: {id:1}, ----以id欄位進行分組 ... reduce: function( doc, result ) { ... result.total += doc.mount; ---每分組統計mount ... result.count++; ---每分組統計文件個數 ... }, ... initial: { total : 0,count: 0 } , ... finalize: function(result) { ... result.avg=result.total/result.count ---每分組的總mount/分組文件個數 ... } ... } ... ) [ { "id" : 123, "total" : 1050, "count" : 3, "avg" : 350 } { "id" : 212, "total" : 475, "count" : 2, "avg" : 237.5 } ] 實現了SQL語句同樣的功能: SELECT id ,SUM(mount) as total,COUNT(id) as count, (SUM(mount)/COUNT(id)) as avg FROM order GROUP BY id
2.mapReduce測試:
https://docs.mongodb.com/manual/tutorial/map-reduce-examples/
>分片叢集環境需要用mapReduce不能使用group:
The db.collection.group() method does not work with sharded clusters.
① map:為對映函式,裡面會呼叫emit(key,value),集合會按照指定的key進行對映分組
② reduce:為簡化函式,會對map分組後的資料進行分組簡化,在reduce(key,value)中
的key就是emit中的key,vlaue則為emit分組後的emit(value)的集合
③ mapReduce:是最後執行的函數了,引數為map,reduce和其他可選引數
1>分組並統計mount總和 var mf=function() {emit(this.id,this.mount);}; var rf=function(key, valuemount) {return Array.sum(valuemount); }; db.order.mapReduce(mf,rf, { out: "mrout" } ) yooo:PRIMARY> db.order.mapReduce(mf,rf, { out: "mrout" } ) { "result" : "mrout", ---輸出結果到mtout的集合中 "timeMillis" : 58, "counts" : { "input" : 5, ----輸入的文件個數 "emit" : 5, ----emit函式呼叫次數 "reduce" : 2, ----reduce函式呼叫次數 "output" : 2 ----返回輸出文件個數 }, "ok" : 1, "operationTime" : Timestamp(1530441013, 4), "$clusterTime" : { "clusterTime" : Timestamp(1530441013, 4), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } } } yooo:PRIMARY> db.mrout.find() { "_id" : 123, "value" : 1050 } { "_id" : 212, "value" : 475 }
2>分組並統計每組文件個數:
下面的寫法結果都是一樣:
var mf2=function(){emit(this.id,1)};
var rf2=function(key,value){var i=0;value.forEach(function(x){i+=x});return i;};
db.order.mapReduce(mf2,rf2, { out: "mrout2" } )
var mf3=function(){emit(this.id,1)};
var rf3=function(key, value) {return Array.sum(value); };
db.order.mapReduce(mf3,rf3, { out: "mrout3" } )
var mf7=function(){emit(this.id,{count:1})};
var rf7=function(key,value){
var result={num:0};
value.forEach(function(x){result.num+=x.count});
return result;
};
db.order.mapReduce(mf7,rf7, { out: "mrout7" } )
yooo:PRIMARY> db.mrout7.find()
{ "_id" : 123, "value" : { "num" : 3 } }
{ "_id" : 212, "value" : { "num" : 2 } }
3.以id欄位分組,並統計每組mount總和和每組的文件個數:
var mf0=function(){
emit(this.id,{mount:this.mount,count:1})
};
map函式的結果是:
第一組:{key:123,values:[{mount:500,count:1},{mount:250,count:1},{mount:300,count:1}]}
第二組:{key:212,values:[{mount:200,count:1},{mount:275,count:1}}
var rf0=function(key,value){
var result={total:0,num:0};
for(var i=0;i<value.length;i++){
result.total+=value[i].mount;
result.num+=value[i].count;
}
return result;
};
var finalff0 = function (key, result) {
result.avg=result.total/result.num;
return result;
};
db.order.mapReduce(mf0,rf0, { out: "mrout0",finalize:finalff0 } )
yooo:PRIMARY> db.mrout0.find()
{ "_id" : 123, "value" : { "total" : 1050, "num" : 3, "avg" : 350 } }
{ "_id" : 212, "value" : { "total" : 475, "num" : 2, "avg" : 237.5 } }
至此mapReduce輸出了和上面group一樣分組統計結果。