How to keep only a defined subset of a collection
I would like to keep only a defined subset of a collection. I don't find any relevant information about it. It's hard to explain, so I put an exemple:
Let's say I have this collection:
db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.4});
db.mycollection.save({ "category" : 1201, "score" : 0.3});
db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.5});
db.mycollection.save({ "category" : 1202, "score" : 0.6});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.3});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.4});
db.mycollection.save({ "category" : 1202, "score" : 0.3});
db.mycollection.save({ "category" : 1203, "score" : 0.8});
db.mycollection.save({ "category" : 1203, "score" : 0.4});
db.mycollection.save({ "category" : 1203, "score" : 0.7});
db.mycollection.save({ "category" : 1203, "score" : 0.3});
db.mycollection.save({ "category" : 1204, "score" : 0.2});
db.mycollection.save({ "category" : 1204, "score" : 0.8});
db.mycollection.save({ "category" : 1204, "score" : 0.7});
db.mycollection.save({ "category" : 1204, "score" : 0.9});
My goal is to get the best 3 rows of all categories (regarding the score). In this example, I try to get this kind of results:
{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.4 }
{ "category" : 1202, "score" : 0.6 }
{ "category" : 1202, "score" : 0.5 }
{ "category" : 1202, "score" : 0.4 }
{ "category" : 1203, "score" : 0.8 }
{ "category" : 1203, "score" : 0.7 }
{ "category" : 1203, "score" : 0.4 }
{ "category" : 1204, "score" : 0.9 }
{ "category" : 1204, "score" : 0.8 }
{ "category" : 1204, "score" : 0.7 }
But I really don't know how to do it. I found a workaround running a map reduce function, but it's really really slow. This is what I done:
var map = function()
{
emit(this.category, thi开发者_如何学Pythons.score);
}
var reduce = function(key, values)
{
var total = [];
values.forEach(function(value)
{
total.push(value);
});
total.sort();
total.reverse();
total = total.splice(0, 3);
return {scores: total};
}
db.mycollection.mapReduce(map, reduce, { out : "myoutput" } );
db.myoutput.find();
db.myoutput.drop();
The result is:
{ "_id" : 1201, "value" : { "scores" : [ 0.5, 0.5, 0.4 ] } }
{ "_id" : 1202, "value" : { "scores" : [ 0.6, 0.5, 0.4 ] } }
{ "_id" : 1203, "value" : { "scores" : [ 0.8, 0.7, 0.4 ] } }
{ "_id" : 1204, "value" : { "scores" : [ 0.9, 0.8, 0.7 ] } }
It's not exactly what I wanted, but it do the job.
My question is: It is possible to do that without using map-reduce? (Or with good performance?)
PS: Excuse my poor english. I'm not fluent.
EDIT:
I finally came with this solution:
var map = function()
{
emit(this.category, this.score);
}
var reduce = function(key, values)
{
var total = [];
values.forEach(function(value)
{
if (value instanceof Array)
total.concat(value);
else if (value instanceof Object)
{
if (value.scores instanceof Array)
total.concat(value.scores);
else
total.push(value.scores);
}
else
total.push(value);
});
total.sort(function (a,b) { return b - a} );
total = total.splice(0, 3);
return {scores: total};
}
You can very easily have your result for a given catagery
db.myCollection.find({category : 1204}).sort({score : -1}.limit(3)
this will give the 3 best scores for a given category
you can then do a loop of the categories but this will require a lot of requests (one per category).
The map reduce solution is the only way to do this and you seem to have a working solution. If you want to improve your performance, work on the reduce function, expecially the following part which is not very good :
values.forEach(function(value)
{
total.push(value);
});
total.sort();
total.reverse();
total = total.splice(0, 3);
精彩评论