mongodb - Incorrect response to mapReduce query in mongo-db -
i have 1000 user records in collecton, in 459 document has gender male , remaining female
//document structure > db.user_details.find().pretty() { "_id" : objectid("557e610d626754910f0974a4"), "id" : 0, "name" : "leanne flinn", "email" : "leanne.flinn@unilogic.com", "work" : "unilogic", "dob" : "fri jun 11 1965 20:50:58 gmt+0530 (ist)", "age" : 5, "gender" : "female", "salary" : 35696, "hobbies" : "acrobatics,meditation,music" } { "_id" : objectid("557e610d626754910f0974a5"), "id" : 1, "name" : "edward young", "email" : "edward.young@solexis.com", "work" : "solexis", "dob" : "wed feb 12 1941 16:45:53 gmt+0530 (ist)", "age" : 1, "gender" : "female", "salary" : 72291, "hobbies" : "acrobatics,meditation,music" } { "_id" : objectid("557e610d626754910f0974a6"), "id" : 2, "name" : "haydee milligan", "email" : "haydee.milligan@dalserve.com", "work" : "dalserve", "dob" : "tue sep 13 1994 13:45:04 gmt+0530 (ist)", "age" : 17, "gender" : "male", "salary" : 20026, "hobbies" : "papier-mache" } { "_id" : objectid("557e610d626754910f0974a7"), "id" : 3, "name" : "lyle keesee", "email" : "lyle.keesee@terrasys.com", "work" : "terrasys", "dob" : "tue apr 25 1922 13:39:46 gmt+0530 (ist)", "age" : 79, "gender" : "female", "salary" : 48032, "hobbies" : "acrobatics,meditation,music" } { "_id" : objectid("557e610d626754910f0974a8"), "id" : 4, "name" : "shea mercer", "email" : "shea.mercer@pancast.com", "work" : "pancast", "dob" : "mon apr 08 1935 06:10:30 gmt+0530 (ist)", "age" : 51, "gender" : "male", "salary" : 31511, "hobbies" : "acrobatics,photography,papier-mache" } number of users in each gender
> db.user_details.find({gender:'male'}).count() 459 > > db.user_details.find({gender:'female'}).count() 541 > db.user_details.find({name:{$ne:null}}).count() 1000 > db.user_details.find({age:{$ne:null}}).count() 1000 map reduce code
mapper = function(){ emit(this.gender, {name:this.name,age:this.age}) } reducer = function(gender, users){ var res = 0; users.foreach(function(user){ res = res + 1 }) return res; } db.user_details.mapreduce(mapper, reducer, {out: {inline:1}}) why map reduce result has 112 documents? should contain 459 , 541 male , female respectively, isn't it?
// map reduce result { "results" : [ { "_id" : "female", "value" : 56 }, { "_id" : "male", "value" : 46 } ], "timemillis" : 45, "counts" : { "input" : 1000, "emit" : 1000, "reduce" : 20, "output" : 2 }, "ok" : 1 } note : know not proper way use map reduce, faced more creepy problem in map reduce. once solution question solve
your problem here have missed 1 of core concepts of how mapreduce works. relevant documentation explains found here:
- mongodb can invoke reduce function more once same key. in case, previous output reduce function key become 1 of input values next reduce function invocation key.
and bit later:
- the type of return object must identical type of value emitted map function
what 2 statements mean need use exact same signature issued both mapper , reducer functions reduce process indeed called "multiple times".
this how mapreduce deals large data, not processing of same values given "key" @ once, doing in incremental "chunks":
there fore if want in output "number" "emit" "number" well:
db.collection.mapreduce( function() { emit(this.gender, this.age); }, function(key,values) { return array.sum( values ) }, { "out": { "inline": 1 } } ) or "count" per type:
db.collection.mapreduce( function() { emit(this.gender, 1); }, function(key,values) { return array.sum( values ) }, { "out": { "inline": 1 } } ) the point "you need put out same put in", "go in again". whatever data want collect, output structure both mapper , reducer must same.
Comments
Post a Comment