Issue
This Content is from Stack Overflow. Question asked by Tarik Tutuncu
I am having trouble counting the occurrences of values in an array on MongoDB. Below is what I have currently.
I have 130million documents in a collection with the below format:
{
"_id": {
"$oid": "6326ea6f53636247f2daeb19"
},
"customerId": 267,
"customerName": "customer name",
"deviceId": 7605,
"deviceModel": "xxx",
"gpsDateTime": {
"$date": {
"$numberLong": "1663494743000"
}
},
"heading": 189,
"iMEI": "461990",
"lat": 33.0997333334,
"lng": -117.2096316667,
"msgDateTime": {
"$date": {
"$numberLong": "1663494767156"
}
},
"speedKPH": 0,
"statusCode": "REPORT",
"tripStartDateTime": {
"$date": {
"$numberLong": "1663384611590"
}
},
"vehicleGroupId": 564,
"vehicleGroupname": "vehicle group name",
"vehicleId": 7474,
"vehicleName": "vehicle name"
}
I have an aggregation for mongodb as follows:
db.gpsmessage.aggregate([{
$match: {
$and: [
{
msgDateTime: {
$exists: true
}
},
{
msgDateTime: {
$ne: null
}
},
{
msgDateTime: {
$gt: ISODate('2020-12-31T00:00:00.000Z')
}
},
{
msgDateTime: {
$lt: ISODate('2022-09-30T00:00:00.000Z')
}
}
]
}
}, {
$project: {
msgDateTime: 1,
customerId: 1,
customerName: 1,
deviceId: 1,
deviceModel: 1,
iMEI: 1,
vehicleId: 1,
vehicleName: 1,
statusCode: 1,
vehicleGroupId: 1,
vehicleGroupname: 1
}
}, {
$group: {
_id: {
deviceId: '$deviceId',
year: {
$year: '$msgDateTime'
},
month: {
$month: '$msgDateTime'
}
},
customerId: {
$last: '$customerId'
},
customerName: {
$last: '$customerName'
},
deviceId: {
$last: '$deviceId'
},
deviceModel: {
$last: '$deviceModel'
},
iMEI: {
$last: '$iMEI'
},
vehicleId: {
$last: '$vehicleId'
},
vehicleName: {
$last: '$vehicleName'
},
vehicleGroupId: {
$last: '$vehicleGroupId'
},
vehicleGroupName: {
$last: '$vehicleGroupname'
},
firstMsgDate: {
$min: '$msgDateTime'
},
lastMsgDate: {
$max: '$msgDateTime'
},
totalMessages: {
$sum: 1
}
}
}, {
$project: {
_id: {
deviceId: 1,
year: 1,
month: 1
},
year: '$_id.year',
month: '$_id.month',
deviceId: 1,
customerId: 1,
customerName: 1,
deviceModel: 1,
iMEI: 1,
vehicleId: 1,
vehicleName: 1,
firstMsgDate: 1,
lastMsgDate: 1,
totalMessages: 1
}
}, {
$out: 'usage_info'
}],{ allowDiskUse:true } );
This outputs the following document:
{
"_id": {
"deviceId": 3494,
"year": 2022,
"month": 8
},
"customerId": 8,
"customerName": "customer name",
"deviceId": 3494,
"deviceModel": "xxx",
"iMEI": "00000000030303030",
"vehicleId": 3651,
"vehicleName": "vehicle name",
"firstMsgDate": {
"$date": {
"$numberLong": "1659741888000"
}
},
"lastMsgDate": {
"$date": {
"$numberLong": "1661906908000"
}
},
"totalMessages": 32,
"totalStatus": [
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"LOW_BATT",
"LOW_BATT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"PWR_ON",
"LOW_BATT",
"REPORT",
"STATUS_EVT",
"STATUS_EVT",
"STATUS_EVT",
"LOW_BATT",
"PWR_ON",
"STATUS_EVT",
"STATUS_EVT",
"STATUS_EVT",
"STATUS_EVT"
],
"year": 2022,
"month": 8
}
What I am trying to figure out is how to count the occurrences in “totalStatus” and modify the response so the totalStatus transforms into the below format and I want to keep the rest of the fields I grouped (these are not the only status messages so the counting must be dynamic):
[{"REPORT": 10},{"STATUS_EVT":15},{"PWR_ON":5}, .....]
I have been trying to find a way to do this for 2 days with no luck.
Help will be much appreciated.
Solution
One option is to replace your $group
and $project
with:
- First group also by
status
- Format the status to a dictionary using
$arrayToObject
- Group again, now by
year
,month
anddeviceId
only. - Format
db.collection.aggregate([
{$group: {
_id: {
deviceId: "$deviceId",
year: {$year: "$msgDateTime"},
month: {$month: "$msgDateTime"},
status: "$statusCode"
},
customerId: {$last: "$customerId"},
customerName: {$last: "$customerName"},
deviceId: {$last: "$deviceId"},
deviceModel: {$last: "$deviceModel"},
iMEI: {$last: "$iMEI"},
vehicleId: {$last: "$vehicleId"},
vehicleName: {$last: "$vehicleName"},
vehicleGroupId: {$last: "$vehicleGroupId"},
vehicleGroupName: {$last: "$vehicleGroupname"},
firstMsgDate: {$min: "$msgDateTime"},
lastMsgDate: {$max: "$msgDateTime"},
totalMessages: {$sum: 1}
}
},
{$set: {status: [{k: "$_id.status", v: "$totalMessages"}],
year: "$_id.year", month: "$_id.month"}
},
{$set: {status: {$arrayToObject: "$status"}}},
{$group: {_id: {deviceId: "$_id.deviceId", month: "$_id.month", year: "$_id.year"},
totalStatus: {$push: "$status"}, data: {$first: "$$ROOT"},
totalMessages: {$sum: "$totalMessages"}}},
{$set: {
"data._id.status": "$$REMOVE",
"data.status": "$$REMOVE",
"data.totalStatus": "$totalStatus",
"data.totalMessages": "$totalMessages"
}},
{$replaceRoot: {newRoot: "$data"}}
])
See how it works on the playground example
This Question was asked in StackOverflow by Tarik Tutuncu and Answered by nimrod serok It is licensed under the terms of CC BY-SA 2.5. - CC BY-SA 3.0. - CC BY-SA 4.0.