如何通过 $lookup 对“已加入”集合执行 $text 搜索？

Posted 2023-03-11

技术标签:

【中文标题】如何通过 $lookup 对“已加入”集合执行 $text 搜索？【英文标题】：How to perform a $text search on a 'joined' collection via $lookup? 【发布时间】：2016-07-20 14:41:33 【问题描述】：

我是 Mongo 的新手，使用 v3.2。我有 2 个集合 Parent & Child。我想使用 Parent.aggregate 并使用 $lookup “加入” Child 然后在 Child 中的字段上执行 $text $search 并在父节点上执行日期范围搜索。这可能吗……？

【问题讨论】：

$text 或 $near 变体等操作要求使用索引，因此只能在“最开始”的聚合管道阶段应用。因此，既然需要$lookup先“加入”，就不能应用这样的查询操作。 MongoDB“仍然”并没有真正做连接。您最好直接使用 $text 查询来查询“孩子”，然后对关联的“父母”运行单独的查询。是否应该将 $lookup 更改为允许“查询”过滤集合以“查找”，然后这将是“理论上”可能的，因为“源集合”将来自它的结果自己的“查询”输出。然而，目前情况并非如此。与SERVER-21612 有点相关，至少在影响$lookup 表达式中发出的“查询”方面。感谢 cmets @BlakesSeven。我真的很喜欢在$lookup 阶段包含查询的参考想法！同时，我将重新设计我的集合，以避免集合之间需要有父/子关系，以适应 $text 搜索索引。看起来你不需要，因为已经发布了一个有效解决方案的答案。 【参考方案1】：

根据已经给出的 cmets，确实不能对 $lookup 的结果执行 $text 搜索，因为除了第一个管道阶段之外的任何阶段都没有可用的索引。确实，尤其是考虑到您确实希望根据“child”集合的结果进行“join”，那么在“child”上进行搜索确实会更好。

这带来了一个明显的结论，即为了做到这一点，您使用初始 $text 查询然后 $lookup “父”查询对“子”集合执行聚合，而不是相反。

作为一个工作示例，仅将核心驱动程序用于演示目的：

MongoClient.connect('mongodb://localhost/rlookup',function(err,db) 
  if (err) throw err;

  var Parent = db.collection('parents');
  var Child = db.collection('children');

  async.series(
    [
      // Cleanup
      function(callback) 
        async.each([Parent,Child],function(coll,callback) 
          coll.deleteMany(,callback);
        ,callback);
      ,
      // Create Index
      function(callback) 
        Child.createIndex( "text": "text" ,callback);
      ,
      // Create Documents
      function(callback) 
        async.parallel(
          [
            function(callback) 
              Parent.insertMany(
                [
                   "_id": 1, "name": "Parent 1" ,
                   "_id": 2, "name": "Parent 2" ,
                   "_id": 3, "name": "Parent 3" 
                ],
                callback
              );
            ,
            function(callback) 
              Child.insertMany(
                [
                  
                    "_id": 1,
                    "parent": 1,
                    "text": "The little dog laughed to see such fun"
                  ,
                  
                    "_id": 2,
                    "parent": 1,
                    "text": "The quick brown fox jumped over the lazy dog"
                  ,
                  
                    "_id": 3,
                    "parent": 1,
                    "text": "The dish ran away with the spoon"
                  ,
                  
                    "_id": 4,
                    "parent": 2,
                    "text": "Miss muffet on here tuffet"
                  ,
                  
                    "_id": 5,
                    "parent": 3,
                    "text": "Lady is a fox"
                  ,
                  
                    "_id": 6,
                    "parent": 3,
                    "text": "Every dog has it's day"
                  
                ],
                callback
              )
            
          ],
          callback
        );
      ,
      // Aggregate with $text and $lookup
      function(callback) 
        Child.aggregate(
          [
             "$match": 
              "$text":  "$search": "fox dog" 
            ,
             "$project": 
              "parent": 1,
              "text": 1,
              "score":  "$meta": "textScore" 
            ,
             "$sort":  "score":  "$meta": "textScore"   ,
             "$lookup": 
              "from": "parents",
              "localField": "parent",
              "foreignField": "_id",
              "as": "parent"
            ,
             "$unwind": "$parent" ,
             "$group": 
              "_id": "$parent._id",
              "name":  "$first": "$parent.name" ,
              "children": 
                "$push": 
                  "_id": "$_id",
                  "text": "$text",
                  "score": "$score"
                
              ,
              "score":  "$sum": "$score" 
            ,
             "$sort":  "score": -1  
          ],
          function(err,result) 
            console.log(JSON.stringify(result,undefined,2));
            callback(err);
          
        )
      
    ],
    function(err) 
      if (err) throw err;
      db.close();
    
  );

);

这会导致$text 匹配每个Parent 中填充的Child 上的查询，并按"score" 排序：

[
  
    "_id": 1,
    "name": "Parent 1",
    "children": [
      
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      ,
      
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      
    ],
    "score": 1.7666666666666666
  ,
  
    "_id": 3,
    "name": "Parent 3",
    "children": [
      
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      ,
      
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      
    ],
    "score": 1.4166666666666665
  
]

这最终是有道理的，并且比从“父”查询以查找$lookup 中的所有“子”然后使用$match“后过滤”以删除任何“子”要有效得多不符合条件，然后丢弃没有任何匹配的“父母”。

同样的情况也适用于猫鼬风格的“引用”，您在“父”中包含“子”的“数组”，而不是在子上记录。因此，只要子级上的"localField"（在这种情况下为_id）与父级上的数组中定义的类型相同"foriegnField"（如果它与.populate()一起工作，这将是）那么您仍然会在$lookup 结果中为每个“孩子”获得匹配的“父母”。

这一切都归结为扭转您的想法并意识到$text 结果是最重要的，因此“那个”是需要启动操作的集合。

这是可能的，但要反过来做。

将猫鼬样式与父级中引用的子级列表一起使用

仅显示父级引用的反向案例以及日期过滤：

var async = require('async'),
    mongoose = require('mongoose'),
    Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost/rlookup');

var parentSchema = new Schema(
  "_id": Number,
  "name": String,
  "date": Date,
  "children": [ "type": Number, "ref": "Child" ]
);

var childSchema = new Schema(
  "_id": Number,
  "text":  "type": String, "index": "text" 
, "autoIndex": false );

var Parent = mongoose.model("Parent",parentSchema),
    Child = mongoose.model("Child",childSchema);

async.series(
  [
    function(callback) 
      async.each([Parent,Child],function(model,callback) 
        model.remove(,callback);
      ,callback);
    ,
    function(callback) 
      Child.ensureIndexes( "background": false ,callback);
    ,
    function(callback) 
      async.parallel(
        [
          function(callback) 
            Parent.create([
              
                "_id": 1,
                "name": "Parent 1",
                "date": new Date("2016-02-01"),
                "children": [1,2]
              ,
              
                "_id": 2,
                "name": "Parent 2",
                "date": new Date("2016-02-02"),
                "children": [3,4]
              ,
              
                "_id": 3,
                "name": "Parent 3",
                "date": new Date("2016-02-03"),
                "children": [5,6]
              ,
              
                "_id": 4,
                "name": "Parent 4",
                "date": new Date("2016-01-15"),
                "children": [1,2,6]
              
            ],callback)
          ,
          function(callback) 
            Child.create([
              
                "_id": 1,
                "text": "The little dog laughed to see such fun"
              ,
              
                "_id": 2,
                "text": "The quick brown fox jumped over the lazy dog"
              ,
              
                "_id": 3,
                "text": "The dish ran awy with the spoon"
              ,
              
                "_id": 4,
                "text": "Miss muffet on her tuffet"
              ,
              
                "_id": 5,
                "text": "Lady is a fox"
              ,
              
                "_id": 6,
                "text": "Every dog has it's day"
              
            ],callback);
          
        ],
        callback
      );
    ,
    function(callback) 
      Child.aggregate(
        [
           "$match": 
            "$text":  "$search": "fox dog" 
          ,
           "$project": 
            "text": 1,
            "score":  "$meta": "textScore" 
          ,
           "$sort":  "score":  "$meta": "textScore"   ,
           "$lookup": 
            "from": "parents",
            "localField": "_id",
            "foreignField": "children",
            "as": "parent"
          ,
           "$project": 
            "text": 1,
            "score": 1,
            "parent": 
              "$filter": 
                "input": "$parent",
                "as": "parent",
                "cond": 
                  "$and": [
                     "$gte": [ "$$parent.date", new Date("2016-02-01") ] ,
                     "$lt": [ "$$parent.date", new Date("2016-03-01") ] 
                  ]
                
              
            
          ,
           "$unwind": "$parent" ,
           "$group": 
            "_id": "$parent._id",
            "name":  "$first": "$parent.name" ,
            "date":  "$first": "$parent.date" ,
            "children": 
              "$push": 
                "_id": "$_id",
                "text": "$text",
                "score": "$score"
              
            ,
            "score":  "$sum": "$score" 
          ,
           "$sort":  "score": -1  
        ],
        function(err,result) 
          console.log(JSON.stringify(result,undefined,2));
          callback(err);
        
      )
    
  ],
  function(err) 
    if (err) throw err;
    mongoose.disconnect();
  
);

输出：

[
  
    "_id": 1,
    "name": "Parent 1",
    "date": "2016-02-01T00:00:00.000Z",
    "children": [
      
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      ,
      
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      
    ],
    "score": 1.7666666666666666
  ,
  
    "_id": 3,
    "name": "Parent 3",
    "date": "2016-02-03T00:00:00.000Z",
    "children": [
      
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      ,
      
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      
    ],
    "score": 1.4166666666666665
  
]

请注意，由于日期不在$filter 应用的查询范围内，因此删除了原本排名最高的 "Parent 4"。

【讨论】：

感谢深入的示例和代码！我现在遇到的问题是计数。首先查询 Children $text 搜索，然后在另一个阶段，查询 Parent 并执行 $group 以显示唯一的 Parent，计数是相对于 Children 结果而不是父母分组聚合。我说得清楚吗？ @CoryRobinson 您可能可以通过asking another question 更清楚地说明这一点，因为这是表达您需要的最佳方式。这是为了向您展示该方法需要“首先”搜索$text，然后加入另一个集合。

以上是关于如何通过 $lookup 对“已加入”集合执行 $text 搜索？的主要内容，如果未能解决你的问题，请参考以下文章