MongoDB mapreduce
🏷️ MongoDB
实现功能
生成一份报表,对每个国家中,包含同样数字的电话号码进行计数。
实现方法
首先,创建一个辅助函数。它抽取所有不同的数字,构成一个数组。
点击查看代码
js> distinctDigits = function(phone) { ... var ... number = phone.components.number + '', ... seen = , ... result = , ... i = number.length; ... while(i--) { ... seen[+number] = 1; ... } ... for(i=0; i<10; i++) { ... if (seen[i]) { ... result[result.length] = i; ... } ... } ... return result; ... } function (phone) { var number = phone.components.number + '', seen = , result = , i = number.length; while(i--) { seen[+number[i]] = 1; } for(i=0; i<10; i++) { if (seen[i]) { result[result.length] = i; } } return result; } > db.system.js.save({_id : 'distinctDigits', value: distinctDigits}) WriteResult({ "nMatched" : 0, "nUpserted" : 1, "nModified" : 0, "_id" : "distinctDigits" }) > db.eval("distinctDigits(db.phones.findOne({ 'components.number': 5551213 }))") WARNING: db.eval is deprecated [ 1, 2, 3, 5 ]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42其次,准备
map
和reduce
函数点击查看代码
js> map = function() { ... var digits = distinctDigits(this); ... emit({ digits : digits, country : this.components.country }, { count : 1 }); ... } function () { var digits = distinctDigits(this); emit({ digits : digits, country : this.components.country }, { count : 1 }); } > reduce = function(key, values) { ... var total = 0; ... for(var i=0; i<values.length; i++) { ... total += values[i].count; ... } ... return {count : total }; ... } function (key, values) { var total = 0; for(var i=0; i<values.length; i++) { total += values[i].count; } return {count : total }; }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22最后,执行
mapreduce
操作点击查看代码
js> results = db.runCommand({ ... mapReduce: 'phones', ... map: map, ... reduce: reduce, ... out: 'phones.report' ... }); { "result" : "phones.report", "timeMillis" : 9690, "counts" : { "input" : 316229, "emit" : 316229, "reduce" : 76620, "output" : 3491 }, "ok" : 1 }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17因为通过
out
参数设置了集合的名称(out: 'phones.report'
),所以可以像其它集合那样查询结果。点击查看查询结果
js> db.phones.report.find({'_id.country' : 8}) { "_id" : { "digits" : [ 0, 1, 2, 3, 4, 5, 6 ], "country" : 8 }, "value" : { "count" : 46 } } { "_id" : { "digits" : [ 0, 1, 2, 3, 5 ], "country" : 8 }, "value" : { "count" : 5 } } { "_id" : { "digits" : [ 0, 1, 2, 3, 5, 6 ], "country" : 8 }, "value" : { "count" : 170 } } { "_id" : { "digits" : [ 0, 1, 2, 3, 5, 6, 7 ], "country" : 8 }, "value" : { "count" : 36 } } { "_id" : { "digits" : [ 0, 1, 2, 3, 5, 6, 8 ], "country" : 8 }, "value" : { "count" : 33 } } { "_id" : { "digits" : [ 0, 1, 2, 3, 5, 6, 9 ], "country" : 8 }, "value" : { "count" : 33 } } { "_id" : { "digits" : [ 0, 1, 2, 3, 5, 7 ], "country" : 8 }, "value" : { "count" : 8 } } { "_id" : { "digits" : [ 0, 1, 2, 3, 5, 8 ], "country" : 8 }, "value" : { "count" : 5 } } { "_id" : { "digits" : [ 0, 1, 2, 3, 5, 9 ], "country" : 8 }, "value" : { "count" : 13 } } { "_id" : { "digits" : [ 0, 1, 2, 4, 5 ], "country" : 8 }, "value" : { "count" : 12 } } { "_id" : { "digits" : [ 0, 1, 2, 4, 5, 6 ], "country" : 8 }, "value" : { "count" : 162 } } { "_id" : { "digits" : [ 0, 1, 2, 4, 5, 6, 7 ], "country" : 8 }, "value" : { "count" : 31 } } { "_id" : { "digits" : [ 0, 1, 2, 4, 5, 6, 8 ], "country" : 8 }, "value" : { "count" : 28 } } { "_id" : { "digits" : [ 0, 1, 2, 4, 5, 6, 9 ], "country" : 8 }, "value" : { "count" : 38 } } { "_id" : { "digits" : [ 0, 1, 2, 4, 5, 7 ], "country" : 8 }, "value" : { "count" : 5 } } { "_id" : { "digits" : [ 0, 1, 2, 4, 5, 8 ], "country" : 8 }, "value" : { "count" : 8 } } { "_id" : { "digits" : [ 0, 1, 2, 4, 5, 9 ], "country" : 8 }, "value" : { "count" : 10 } } { "_id" : { "digits" : [ 0, 1, 2, 5 ], "country" : 8 }, "value" : { "count" : 24 } } { "_id" : { "digits" : [ 0, 1, 2, 5, 6 ], "country" : 8 }, "value" : { "count" : 264 } } { "_id" : { "digits" : [ 0, 1, 2, 5, 6, 7 ], "country" : 8 }, "value" : { "count" : 139 } } Type "it" for more
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22输入 it 可以继续遍历结果集。