Mongo / Mongoose 聚合 - $redact 和 $cond 问题

Posted

技术标签:

【中文标题】Mongo / Mongoose 聚合 - $redact 和 $cond 问题【英文标题】:Mongo / Mongoose Aggregation - $redact and $cond issues 【发布时间】:2017-10-12 13:00:07 【问题描述】:

我很幸运地从@chridam 获得了另一个 SO 问题Mongo / Mongoose - Aggregating by Date 的精彩答案,其中给出了一组文档,例如:

 "_id" : ObjectId("5907a5850b459d4fdcdf49ac"), "amount" : -33.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-04-26T23:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.581Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49ba"), "amount" : -61.3, "name" : "Amazon", "method" : "VIS", "date" : ISODate("2017-03-23T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.592Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49ce"), "amount" : -3.3, "name" : "Tesco", "method" : "VIS", "date" : ISODate("2017-03-15T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.601Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49cc"), "amount" : -26.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-16T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.600Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49f7"), "amount" : -63.3, "name" : "Sky", "method" : "VIS", "date" : ISODate("2017-03-02T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.617Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49be"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-22T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.593Z"), "category" : "Not Set", "__v" : 0 

需要一个按供应商、年、月和周汇总支出的查询。查询如下,它几乎工作得非常好,但是当我在我的应用程序中使用它时,我注意到了一个重大问题

db.statements.aggregate([
   "$match":  "name": "RINGGO"  ,
  
  "$redact": 
      "$cond": [
          
              "$and": [
                  "$eq": [ "$year": "$date" ,  2017  ], // within my route this uses parseInt(req.params.year)
                  "$eq": [ "$month": "$date" , 3 ], // within my route this uses parseInt(req.params.month)
                  "$eq": [ "$week": "$date" ,  12  ] // within my route this uses parseInt(req.params.week)
            ]
        ,
        "$$KEEP",
        "$$PRUNE"
    ]

,
    "$group": 
        "_id": 
            "name": "$name",
            "year":  "$year": "$date" ,
            "month":  "$month": "$date" ,
            "week":  "$week": "$date" 
        ,
        "total":  "$sum": "$amount" 
    
,

    "$group": 
        "_id": 
            "name": "$_id.name",
            "year": "$_id.year"
        ,
        "YearlySpends":  "$push": "$total" ,
        "totalYearlyAmount":  "$sum": "$total" ,
        "data":  "$push": "$$ROOT" 
    
,
 "$unwind": "$data" ,

    "$group": 
        "_id": 
            "name": "$_id.name",
            "month": "$data._id.month"
        ,
        "YearlySpends":  "$first": "$YearlySpends" ,
        "totalYearlyAmount":  "$first": "$totalYearlyAmount" ,
        "MonthlySpends":  "$push": "$data.total" ,
        "totalMonthlyAmount":  "$sum": "$data.total" ,
        "data":  "$push": "$data" 
    
,
 "$unwind": "$data" ,

    "$group": 
        "_id": 
            "name": "$_id.name",
            "week": "$data._id.week"
        ,
        "YearlySpends":  "$first": "$YearlySpends" ,
        "totalYearlyAmount":  "$first": "$totalYearlyAmount" ,
        "MonthlySpends":  "$first": "$MonthlySpends" ,
        "totalMonthlyAmount":  "$first": "$totalMonthlyAmount" ,
        "WeeklySpends":  "$push": "$data.total" ,
        "totalWeeklyAmount":  "$sum": "$data.total" ,
        "data":  "$push": "$data" 
    
,
 "$unwind": "$data" ,

    "$group": 
        "_id": "$data._id",
        "YearlySpends":  "$first": "$YearlySpends" ,
        "totalYearlyAmount":  "$first": "$totalYearlyAmount" ,
        "MonthlySpends":  "$first": "$MonthlySpends" ,
        "totalMonthlyAmount":  "$first": "$totalMonthlyAmount" ,
        "WeeklySpends":  "$first": "$WeeklySpends" ,
        "totalWeeklyAmount":  "$first": "$totalWeeklyAmount" 
    

])

运行此查询返回

 "_id" :
  "name" : "RINGGO", 
   "year" : 2017, 
   "month" : 3, 
   "week" : 12 , 
   "YearlySpends" : [ -9.6 ], 
   "totalYearlyAmount" : -9.6, 
   "MonthlySpends" : [ -9.6 ], 
   "totalMonthlyAmount" : -9.6, 
   "WeeklySpends" : [ -9.6 ], 
   "totalWeeklyAmount" : -9.6 

当我改为想查看当月的支出时

"$cond": [
          
            "$and": [
                  "$eq": [ "$year": "$date" ,  2017  ],
                  "$eq": [ "$month": "$date" , 3 ]
            ]
          ,
        "$$KEEP",
        "$$PRUNE"
      ]

我明白了:

 "_id" :  "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 12 , "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 
 "_id" :  "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 9 , "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -3.3 ], "totalWeeklyAmount" : -3.3 
 "_id" :  "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 11 , "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 
 "_id" :  "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 13 , "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -3.3 ], "totalWeeklyAmount" : -3.3 

但是,当我运行一个简单的 db.statements.find("name":"RINGGO") 时,我得到:

 "_id" : ObjectId("5907a5850b459d4fdcdf49ac"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-26T23:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.581Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49ba"), "amount" : -6.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-23T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.592Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49ce"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-15T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.601Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49cc"), "amount" : -6.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-16T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.600Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49f7"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-02T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.617Z"), "category" : "Not Set", "__v" : 0 
 "_id" : ObjectId("5907a5850b459d4fdcdf49be"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-22T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.593Z"), "category" : "Not Set", "__v" : 0 

因此您可以看到,与按名称查找的输出中显示的相比,先前输出中 MonthlySpends 中的项目数量不同。您还可以看到,一些值在 MonthlySpends 中不应该被加在一起。

理想情况下,我希望得到以下输出: 当我有$redact 包含:

"$cond": [
        
            "$and": [
                  "$eq": [ "$year": "$date" ,  2017  ], 
                  "$eq": [ "$month": "$date" , 3 ], 
                  "$eq": [ "$week": "$date" ,  12  ] 
            ]
        ,
        "$$KEEP",
        "$$PRUNE"
    ]

返回

 "_id" :  "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 12 , "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 

当我有 $redact 包含:

"$cond": [
        
            "$and": [
                  "$eq": [ "$year": "$date" ,  2017  ], 
                  "$eq": [ "$month": "$date" , 3 ],
            ]
        ,
        "$$KEEP",
        "$$PRUNE"
        ]

返回

 "_id" :  "name" : "RINGGO", "year" : 2017, "month" : 3 , "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997 

当我有 $redact 包含:

"$cond": [
        
            "$and": [
                  "$eq": [ "$year": "$date" ,  2017  ]
            ]
        ,
        "$$KEEP",
        "$$PRUNE"
        ]

返回

 "_id" :  "name" : "RINGGO", "year" : 2017 , "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997

在这方面需要任何帮助。我已经尝试修改查询,但恐怕我对它的理解还不够,无法正确修改它。

我的 Mongoose 版本是 ^4.9.5,我的 mongo 版本是 3.4.2

【问题讨论】:

使用$redact 过滤日期范围非常低效。您真的应该在初始 $match 管道阶段使用 $gte$lte 运算符简单地查询“范围”。 您似乎也忽略了这一点,即随后的$group 阶段逐渐降低了这里的粒度。即首先汇总一周,然后是一个月中的所有周,一年中的几个月。如果您打算将最初的选择范围更改为仅一周或仅一个月,那么您跟进较少的$group 阶段。 @NeilLunn 感谢您抽出宝贵时间发表评论!我很确定我在这里错过了重点。我是 mongo/mongoose 新手,所以努力学习!我试过删除一些$group 阶段,但它不会改变返回的记录数。此外,这将如何影响 "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ] 中不正确的项目数量以及 totalMonthlyAmount 中的项目总和?您愿意发布一个我可以测试并希望标记为正确的答案吗? @NeilLunn 未使用 $gte$lte 的原因可能是查询实际上是由 URL 驱动的,例如RINGGO/2017/3/12RINGGO/2017/3。您可以在上面的问题中看到代码 cmets 显示 parseInt(req.params.year)parseInt(req.params.month) 为了完整起见,您还应该添加您的 mongodb 和 mongoose 版本。 【参考方案1】:

您可以在3.4 版本中尝试$facet$addFields 进行并行聚合。

这将降低整体复杂性,您可以同时使用自己的匹配输入运行分组。

以下代码根据请求对象动态构建聚合管道。

// Sample request
var request = 
  "name":"RINGGO",
  "year": 2017,
  "month":3,
  "week":12
;

// Build initial match document on name

var match1 = 
  name: request["name"]
;

// Build project & facet document for date based aggregation

var addFields = ;
var facet = ;

// Add year followed by year facet

if (request["year"]) 
    addFields["year"] =  "$year": "$date" ,
    facet["Yearly"] = 
      [
        
          "$match": "year": request["year"] 
        ,
        
          "$group": 
            "_id": 
              "name": "$name",
              "year": "$year"
            ,
            "spend":  "$push":"$amount" ,
            "total":  "$sum": "$amount" 
        
      
    ];


// Add month followed by month facet

if (request["month"]) 
    addFields["month"] =  "$month": "$date" ;
    facet["Monthly"] = 
      [
        
          "$match": "month": request["month"] 
        ,
        
          "$group": 
            "_id": 
              "name": "$name",
              "month": "$month"
            ,
            "spend":  "$push":"$amount" ,
            "total":  "$sum": "$amount" 
         
      
    ];


// Add week followed by week facet

if (request["week"]) 
    addFields["week"] =  "$week": "$date" ;
    facet["Weekly"] = 
      [
        
          "$match": "week": request["week"] 
        ,
        
          "$group": 
            "_id": 
              "name": "$name",
              "week": "$week"
            ,
            "spend":  "$push":"$amount" ,
            "total":  "$sum": "$amount" 
         
      
    ];


// Use aggregate builder

statements.aggregate()
        .match(match1)
        .append("$addFields": addFields) // No addFields stage in mongoose builder
        .facet(facet)
        .exec(function(err, data) );

Mongo Shell 查询 name/year/month/week 条件。

db.statements.aggregate(
    '$match': 
        name: 'RINGGO'
    
, 
    '$addFields': 
        year: 
            '$year': '$date'
        ,
        month: 
            '$month': '$date'
        ,
        week: 
            '$week': '$date'
        
    
, 
    '$facet': 
        Yearly: [
                '$match': 
                    year: 2017
                
            ,
            
                '$group': 
                    _id: 
                        name: '$name',
                        year: '$year'
                    ,
                    spend: 
                        '$push': '$amount'
                    ,
                    total: 
                        '$sum': '$amount'
                    
                
            
        ],
        Monthly: [
                '$match': 
                    month: 3
                
            ,
            
                '$group': 
                    _id: 
                        name: '$name',
                        month: '$month'
                    ,
                    spend: 
                        '$push': '$amount'
                    ,
                    total: 
                        '$sum': '$amount'
                    
                
            
        ],
        Weekly: [
                '$match': 
                    week: 12
                
            ,
            
                '$group': 
                    _id: 
                        name: '$name',
                        week: '$week'
                    ,
                    spend: 
                        '$push': '$amount'
                    ,
                    total: 
                        '$sum': '$amount'
                    
                
            
        ]
    
)

示例响应

    
    "Yearly": [
        "_id": 
            "name": "RINGGO",
            "year": 2017
        ,
        "spend": [-3.3, -6.3, -3.3, -6.3, -3.3, -3.3],
        "total": -25.799999999999997
    ],
    "Monthly": [
        "_id": 
            "name": "RINGGO",
            "month": 3
        ,
        "spend": [-3.3, -6.3, -3.3, -6.3, -3.3, -3.3],
        "total": -25.799999999999997
    ],
    "Weekly": [
        "_id": 
            "name": "RINGGO",
            "week": 12
        ,
        "spend": [-6.3, -3.3],
        "total": -9.6
    ]

您可以对Year/MonthYear 输入值运行类似的聚合。

所以你可以看到里面有不同数量的项目 上一个输出中的 MonthlySpends 与输出中显示的比较 从按名称查找。您还可以看到一些值是 在不应将它们汇总到 MonthlySpends 中。

这发生在 $group 1 中,其中 $week 聚合将两个日期 [15, 16] 中的每一个汇总到第 11 周,其他两个日期 [22, 23] 汇总到第 12 周后显示为总计在MonthySpends

【讨论】:

感谢@veeram,这看起来很有希望。请给我一点时间,今晚在我的应用程序的一系列场景中进行测试。我是否认为只获得Year/Month 而不是week 的结果我只是从$addFields 中删除week: '$week': '$date' ,然后删除Weekly facet 不客气。请慢慢来,等待其他解决方案。是的,你的理解是正确的。您可以删除整周的代码块(if (request["week"])..),或者如果您在请求对象(var request = "name":"RINGGO", "year": 2017, "month":3 ;)中不包含/缺少星期参数,这会将 if (request["week"]) 评估为 false,因此不会包含该代码堵塞。 —— 很抱歉,我知道这不是最初表达的,但有没有办法在每年、每月、每周spend 内获取支出日期?也许spend 可能看起来像 '"spend": [date:2017-03-26, amount:-3.3, date:2017-03-25, amount:-6.3,]' 等,或者你可以有个更好的主意 这不是问题。您可以将聚合管道的spend 部分更新为spend: '$push': 'date': $dateToString: 'format': "%Y-%m-%d", 'date': '$date' , 'amount':'$amount' 。使用$dateToString 运算符来格式化日期。

以上是关于Mongo / Mongoose 聚合 - $redact 和 $cond 问题的主要内容,如果未能解决你的问题,请参考以下文章

Mongoose 聚合返回空结果并在 mongo 控制台中工作 [重复]

Mongoose 聚合返回空结果并在 mongo 控制台中工作 [重复]

Mongoose 虚拟填充和聚合

无法通过 mongoose 删除 mongo 数据库

mongo 进阶之—— mongoose 认识

mongo 进阶之—— mongoose 认识