[SOLVED] MongoDB Aggregation – Counting the occurrences of elements in an array

Issue

This Content is from Stack Overflow. Question asked by Tarik Tutuncu

I am having trouble counting the occurrences of values in an array on MongoDB. Below is what I have currently.

I have 130million documents in a collection with the below format:

{
  "_id": {
    "$oid": "6326ea6f53636247f2daeb19"
  },
  "customerId": 267,
  "customerName": "customer name",
  "deviceId": 7605,
  "deviceModel": "xxx",
  "gpsDateTime": {
    "$date": {
      "$numberLong": "1663494743000"
    }
  },
  "heading": 189,
  "iMEI": "461990",
  "lat": 33.0997333334,
  "lng": -117.2096316667,
  "msgDateTime": {
    "$date": {
      "$numberLong": "1663494767156"
    }
  },
  "speedKPH": 0,
  "statusCode": "REPORT",
  "tripStartDateTime": {
    "$date": {
      "$numberLong": "1663384611590"
    }
  },
  "vehicleGroupId": 564,
  "vehicleGroupname": "vehicle group name",
  "vehicleId": 7474,
  "vehicleName": "vehicle name"
}

I have an aggregation for mongodb as follows:

db.gpsmessage.aggregate([{
     $match: {
      $and: [
       {
        msgDateTime: {
         $exists: true
        }
       },
       {
        msgDateTime: {
         $ne: null
        }
       },
       {
        msgDateTime: {
         $gt: ISODate('2020-12-31T00:00:00.000Z')
        }
       },
       {
        msgDateTime: {
         $lt: ISODate('2022-09-30T00:00:00.000Z')
        }
       }
      ]
     }
    }, {
     $project: {
      msgDateTime: 1,
      customerId: 1,
      customerName: 1,
      deviceId: 1,
      deviceModel: 1,
      iMEI: 1,
      vehicleId: 1,
      vehicleName: 1,
      statusCode: 1,
      vehicleGroupId: 1,
      vehicleGroupname: 1
     }
    }, {
     $group: {
      _id: {
       deviceId: '$deviceId',
       year: {
        $year: '$msgDateTime'
       },
       month: {
        $month: '$msgDateTime'
       }
      },
      customerId: {
       $last: '$customerId'
      },
      customerName: {
       $last: '$customerName'
      },
      deviceId: {
       $last: '$deviceId'
      },
      deviceModel: {
       $last: '$deviceModel'
      },
      iMEI: {
       $last: '$iMEI'
      },
      vehicleId: {
       $last: '$vehicleId'
      },
      vehicleName: {
       $last: '$vehicleName'
      },
      vehicleGroupId: {
       $last: '$vehicleGroupId'
      },
      vehicleGroupName: {
       $last: '$vehicleGroupname'
      },
      firstMsgDate: {
       $min: '$msgDateTime'
      },
      lastMsgDate: {
       $max: '$msgDateTime'
      },
      totalMessages: {
       $sum: 1
      }
     }
    }, {
     $project: {
      _id: {
       deviceId: 1,
       year: 1,
       month: 1
      },
      year: '$_id.year',
      month: '$_id.month',
      deviceId: 1,
      customerId: 1,
      customerName: 1,
      deviceModel: 1,
      iMEI: 1,
      vehicleId: 1,
      vehicleName: 1,
      firstMsgDate: 1,
      lastMsgDate: 1,
      totalMessages: 1
     }
    }, {
     $out: 'usage_info'
    }],{ allowDiskUse:true } );

This outputs the following document:

{
  "_id": {
    "deviceId": 3494,
    "year": 2022,
    "month": 8
  },
  "customerId": 8,
  "customerName": "customer name",
  "deviceId": 3494,
  "deviceModel": "xxx",
  "iMEI": "00000000030303030",
  "vehicleId": 3651,
  "vehicleName": "vehicle name",
  "firstMsgDate": {
    "$date": {
      "$numberLong": "1659741888000"
    }
  },
  "lastMsgDate": {
    "$date": {
      "$numberLong": "1661906908000"
    }
  },
  "totalMessages": 32,
  "totalStatus": [
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "LOW_BATT",
    "LOW_BATT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "PWR_ON",
    "LOW_BATT",
    "REPORT",
    "STATUS_EVT",
    "STATUS_EVT",
    "STATUS_EVT",
    "LOW_BATT",
    "PWR_ON",
    "STATUS_EVT",
    "STATUS_EVT",
    "STATUS_EVT",
    "STATUS_EVT"
  ],
  "year": 2022,
  "month": 8
}

What I am trying to figure out is how to count the occurrences in “totalStatus” and modify the response so the totalStatus transforms into the below format and I want to keep the rest of the fields I grouped (these are not the only status messages so the counting must be dynamic):

[{"REPORT": 10},{"STATUS_EVT":15},{"PWR_ON":5}, .....]

I have been trying to find a way to do this for 2 days with no luck.
Help will be much appreciated.



Solution

One option is to replace your $group and $project with:

  1. First group also by status
  2. Format the status to a dictionary using $arrayToObject
  3. Group again, now by year, month and deviceId only.
  4. Format
db.collection.aggregate([
  {$group: {
      _id: {
        deviceId: "$deviceId",
        year: {$year: "$msgDateTime"},
        month: {$month: "$msgDateTime"},
        status: "$statusCode"
      },
      customerId: {$last: "$customerId"},
      customerName: {$last: "$customerName"},
      deviceId: {$last: "$deviceId"},
      deviceModel: {$last: "$deviceModel"},
      iMEI: {$last: "$iMEI"},
      vehicleId: {$last: "$vehicleId"},
      vehicleName: {$last: "$vehicleName"},
      vehicleGroupId: {$last: "$vehicleGroupId"},
      vehicleGroupName: {$last: "$vehicleGroupname"},
      firstMsgDate: {$min: "$msgDateTime"},
      lastMsgDate: {$max: "$msgDateTime"},
      totalMessages: {$sum: 1}
    }
  },
  {$set: {status: [{k: "$_id.status", v: "$totalMessages"}],
      year: "$_id.year",  month: "$_id.month"}
  },
  {$set: {status: {$arrayToObject: "$status"}}},
  {$group: {_id: {deviceId: "$_id.deviceId", month: "$_id.month", year: "$_id.year"},
      totalStatus: {$push: "$status"}, data: {$first: "$$ROOT"},
      totalMessages: {$sum: "$totalMessages"}}},
  {$set: {
      "data._id.status": "$$REMOVE",
      "data.status": "$$REMOVE",
      "data.totalStatus": "$totalStatus",
      "data.totalMessages": "$totalMessages"
  }},
  {$replaceRoot: {newRoot: "$data"}}
])

See how it works on the playground example


This Question was asked in StackOverflow by Tarik Tutuncu and Answered by nimrod serok It is licensed under the terms of CC BY-SA 2.5. - CC BY-SA 3.0. - CC BY-SA 4.0.

people found this article helpful. What about you?