来自多个集合的 $lookup 和嵌套输出

Posted

技术标签:

【中文标题】来自多个集合的 $lookup 和嵌套输出【英文标题】:$lookup from Multiple Collections, and nested output 【发布时间】:2018-02-20 08:06:52 【问题描述】:

我有多个集合,我使用了单独的集合和外键的方法,我想加入这个集合来构建一个嵌套的集合。 这是我的集合模式:

const SurveySchema = new Schema(
_id: type: Schema.ObjectId, auto: true ,
name: String,
enabled: type: Boolean, Default: true,
created_date:type: Date, Default: Date.now,
company: type: Schema.Types.ObjectId, ref: 'Company',);

const GroupSchema = new Schema(
  _id: type: Schema.ObjectId, auto: true ,
  name: String,
  order: String,
  created_date:type: Date, Default: Date.now,
  questions: [type: Schema.Types.ObjectId, ref: 'Question'],
  survey: type: Schema.Types.ObjectId, ref: 'Survey'
);

const ResponseSchema = new Schema(
  _id: type: Schema.ObjectId, auto: true ,
  response_text: String,
  order: String,
  created_date:type: Date, Default: Date.now,
  question:type: Schema.Types.ObjectId, ref: 'Question'
);

这是我构建这个嵌套对象的代码:

Survey.aggregate([
   $match:  ,
   $lookup: 
    from: 'groups',
    localField: '_id',
    foreignField: 'survey',
    as: 'groupsofquestions',
  ,
   $unwind: 
    path: "$groupsofquestions",
    preserveNullAndEmptyArrays: true
  ,
   $lookup: 
    from: 'questions',
    localField: 'groupsofquestions._id',
    foreignField: 'group',
    as: 'questionsofgroup',
  ,
   $lookup: 
    from: 'response',
    localField: 'questionsofgroup._id',
    foreignField: 'question',
    as: 'responses',
  ,
   $group: 
    _id: "$_id",
    name: $first: "$name",
    groups: $push: 
      id: "$groupsofquestions._id",
      name: "$groupsofquestions.name",
      questions: "$questionsofgroup",
      reponses: "$responses"
    
  
])

我想如下结构,(也有外部链接):

http://jsoneditoronline.org/?id=d7d1779b3b95e3acb28f8a2be0785423

[
  
    "__v": 0,
    "_id": "59b6715725dcd2060da7f591",
    "company": "59b6715725dcd2060da7f58f",
    "created_date": "2017-09-11T11:19:51.709Z",
    "enabled": true,
    "name": "function String()  [native code] ",
    "groups": [
      
        "_id": "59b6715725dcd2060da7f592",
        "name": "groupe 1 des question",
        "order": "1",
        "created_date": "2017-09-11T11:19:51.709Z",
        "survey": "59b6715725dcd2060da7f591",
        "__v": 0,
        "questions": [
          
            "_id": "59b6715725dcd2060da7f594",
            "question_text": "question 1 group 1",
            "order": "1",
            "created_date": "2017-09-11T11:19:51.709Z",
            "group": "59b6715725dcd2060da7f592",
            "__v": 0,
            "responses": [
              
                "_id": "59b6715725dcd2060da7f598",
                "response_text": "reponse 1 question 1 group 1",
                "order": "1",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f594",
                "__v": 0
              ,
              
                "_id": "59b6715725dcd2060da7f599",
                "response_text": "reponse 2 question 1 group 1",
                "order": "2",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f594",
                "__v": 0
              
            ]
          ,
          
            "_id": "59b6715725dcd2060da7f595",
            "question_text": "question 2 group 1",
            "order": "2",
            "created_date": "2017-09-11T11:19:51.710Z",
            "group": "59b6715725dcd2060da7f592",
            "__v": 0,
            "responses": [
              
                "_id": "59b6715725dcd2060da7f59a",
                "response_text": "reponse 1 question 2 group 1",
                "order": "1",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f595",
                "__v": 0
              ,
              
                "_id": "59b6715725dcd2060da7f59b",
                "response_text": "reponse 2 question 2 group 1",
                "order": "2",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f595",
                "__v": 0
              
            ]
          
        ]
      ,
      
        "_id": "59b6715725dcd2060da7f593",
        "name": "groupe 2 des question",
        "order": "2",
        "created_date": "2017-09-11T11:19:51.709Z",
        "survey": "59b6715725dcd2060da7f591",
        "__v": 0,
        "questions": [
          
            "_id": "59b6715725dcd2060da7f596",
            "question_text": "question 1 group 1",
            "order": "1",
            "created_date": "2017-09-11T11:19:51.710Z",
            "group": "59b6715725dcd2060da7f592",
            "__v": 0,
            "responses": [
              
                "_id": "59b6715725dcd2060da7f59c",
                "response_text": "reponse 1 question 1 group 2",
                "order": "1",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f596",
                "__v": 0
              ,
              
                "_id": "59b6715725dcd2060da7f59d",
                "response_text": "reponse 2 question 1 group 2",
                "order": "2",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f596",
                "__v": 0
              
            ]
          ,
          
            "_id": "59b6715725dcd2060da7f597",
            "question_text": "question 2 group 1",
            "order": "2",
            "created_date": "2017-09-11T11:19:51.710Z",
            "group": "59b6715725dcd2060da7f592",
            "__v": 0,
            "responses": [
              
                "_id": "59b6715725dcd2060da7f59e",
                "response_text": "reponse 1 question 2 group 2",
                "order": "1",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f597",
                "__v": 0
              ,
              
                "_id": "59b6715725dcd2060da7f59f",
                "response_text": "reponse 2 question 2 group 2",
                "order": "2",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f597",
                "__v": 0
              
            ]
          
        ]
      
    ]
  
]

有人可以帮我按照示例中所示的方式构建响应吗?

【问题讨论】:

您使用preserveNullAndEmptyArrays 有什么原因,还是您只是从某个地方复制了示例?在 $lookup 与外部集合中的任何结果不匹配的情况下,它确实会有所不同。然而,如果在关系总是期望匹配某些东西时不需要这样做,这是一个合理的性能改进。您还错过了 Question 架构定义,但我们可以合理地假设它与示例输出匹配,因为其他架构也匹配。 我不确定你在问什么,因为你似乎正在做你需要做的一切。 【参考方案1】:

大多数情况下,您需要在使用$unwind 处理后使用$group 来“重建”,以便再次嵌套您的数组输出。还有一些提示:

   Survey.aggregate([
       "$lookup": 
        "from": Group.collection.name,
        "localField": "_id",
        "foreignField": "survey",
        "as": "groups"
      ,
       "$unwind": "$groups" ,
       "$lookup": 
        "from": Question.collection.name,
        "localField": "groups.questions",
        "foreignField": "_id",
        "as": "groups.questions"
      ,
       "$unwind": "$groups.questions" ,
       "$lookup": 
        "from": Response.collection.name,
        "localField": "groups.questions._id",
        "foreignField": "question",
        "as": "groups.questions.responses"
      ,
       "$group": 
        "_id": 
          "_id": "$_id",
          "company": "$company",
          "created_date": "$created_date",
          "enabled": "$enabled",
          "name": "$name",
          "groups": 
            "_id": "$groups._id",
            "name": "$groups.name",
            "order": "$groups.order",
            "created_date": "$groups.created_date",
            "survey": "$groups.survey"
          
        ,
        "questions":  "$push": "$groups.questions" 
      ,
       "$sort":  "_id": 1  ,
       "$group": 
        "_id": "$_id._id",
        "company":  "$first": "$_id.company" ,
        "created_date":  "$first": "$_id.created_date" ,
        "enabled":  "$first": "$_id.enabled" ,
        "name":  "$first": "$_id.name" ,
        "groups": 
          "$push": 
            "_id": "$_id.groups._id",
            "name": "$_id.groups.name",
            "order": "$_id.groups.order",
            "created_date": "$_id.groups.created_date",
            "survey": "$_id.groups.survey",
            "questions": "$questions"
          
        
      ,
       "$sort":  "_id": 1  
    ]);

这就是重建数组的方法,您一次只做一步,而不是试图一次性完成所有工作。这可能是最难理解的概念,但“管道”意味着您实际上可以“多次”执行操作,将一个操作链接到另一个操作的输出。

所以第一个$group 是在“组”详细信息级别完成的,因为您想要$push 数组中的"questions" 项目,这是$unwind 的最后一个“解构”。请注意,"responses" 仍然是一个数组,作为最后一个$lookup 阶段的结果。但除了数组内容之外,其他所有内容都在_id“分组键”中。

在“第二个”$group 上,您实际上使用$first 之类的运算符来构造Survey 级别的特定字段属性。 "groups" 数组又是用$push 构造的,因此上一阶段的“分组键”中的每个属性都以_id 为前缀,所以这里都是引用它们的方式。

此外,从技术角度来看,如果您有预期的订单,您应该在每次调用 $group 之后始终 $sort。分组键的集合不保证以任何特定顺序(尽管通常它是反向堆栈顺序)。如果您期望一个顺序,请指定它,尤其是在应用 $push 以在 $group 之后重构数组时。

之所以没有$sort之前最初的$group是因为前面的管道阶段实际上对现有订单没有任何影响。因此,发现的顺序始终保持不变。

几个提示:

    Group.collection.name 这样的东西实际上使用猫鼬模型上定义的属性来做像“获取集合名称”这样的事情。这使您免于对 $lookup 本身进行硬编码,并与运行代码时模型上注册的任何内容保持一致。

    1234563为路径制作临时名称确实没有多大意义,除非您在管道阶段专门这样做,以便在稍后阶段“重新排序”字段的输出。否则,只需像在所有情况下一样使用您打算输出的名称。这样更容易阅读和解释意图。

    除非你是认真的,否则不要使用preserveNullAndEmptyArrays 之类的选项。有一种“特殊方式”,$lookup + $unwind 的组合实际上被处理,并且真正在“单个阶段”中执行,而不是在“展开”之前检索所有结果。您可以在聚合管道的“解释”输出中看到这一点。简而言之,如果您总是有关系匹配,则不要使用该选项。最好不要。


演示

作为一个完整的清单和概念证明,我们可以加载您的源 JSON,将其存储在数据库中的单独集合中,然后使用聚合语句来检索和重建所需的结构:

const fs = require('fs'),
      mongoose = require('mongoose'),
      Schema = mongoose.Schema;

mongoose.Promise = global.Promise;
mongoose.set('debug',true);

const uri = 'mongodb://localhost/nested',
      options =  useMongoClient: true ;

const responseSchema = new Schema(
  response_text: String,
  order: String,
  created_date: Date,
  question:  type: Schema.Types.ObjectId, ref: 'Question' 
);

const questionSchema = new Schema(
  question_text: String,
  order: String,
  created_date: Date,
  group:  type: Schema.Types.ObjectId, ref: 'Group' 
);

const groupSchema = new Schema(
  name: String,
  order: String,
  created_date: Date,
  survey:  type: Schema.Types.ObjectId, ref: 'Survey' ,
  questions: [ type: Schema.Types.ObjectId, ref: 'Question' ]
);

const surveySchema = new Schema(
  company:  type: Schema.Types.ObjectId, ref: 'Company' ,
  created_date: Date,
  enabled: Boolean,
  name: String
);

const companySchema = new Schema(

);

const Company = mongoose.model('Company', companySchema);
const Survey = mongoose.model('Survey', surveySchema);
const Group = mongoose.model('Group', groupSchema);
const Question = mongoose.model('Question', questionSchema);
const Response = mongoose.model('Response', responseSchema);


function log(data) 
  console.log(JSON.stringify(data,undefined,2))


(async function() 

  try 

    const conn = await mongoose.connect(uri,options);

    await Promise.all(
      Object.keys(conn.models).map( m => conn.models[m].remove() )
    );

    // Initialize data
    let content = JSON.parse(fs.readFileSync('./jsonSurveys.json'));
    //log(content);

    for ( let item of content ) 

      let survey = await Survey.create(item);
      let company = await Company.create( _id: survey.company );

      for ( let group of item.groups ) 
        await Group.create(group);
        for ( let question of group.questions ) 
          await Question.create(question);
          for ( let response of question.responses ) 
            await Response.create(response);
          
        
      

    

    // Run aggregation

    let results = await Survey.aggregate([
       "$lookup": 
        "from": Group.collection.name,
        "localField": "_id",
        "foreignField": "survey",
        "as": "groups"
      ,
       "$unwind": "$groups" ,
       "$lookup": 
        "from": Question.collection.name,
        "localField": "groups.questions",
        "foreignField": "_id",
        "as": "groups.questions"
      ,
       "$unwind": "$groups.questions" ,
       "$lookup": 
        "from": Response.collection.name,
        "localField": "groups.questions._id",
        "foreignField": "question",
        "as": "groups.questions.responses"
      ,
       "$group": 
        "_id": 
          "_id": "$_id",
          "company": "$company",
          "created_date": "$created_date",
          "enabled": "$enabled",
          "name": "$name",
          "groups": 
            "_id": "$groups._id",
            "name": "$groups.name",
            "order": "$groups.order",
            "created_date": "$groups.created_date",
            "survey": "$groups.survey"
          
        ,
        "questions":  "$push": "$groups.questions" 
      ,
       "$sort":  "_id": 1  ,
       "$group": 
        "_id": "$_id._id",
        "company":  "$first": "$_id.company" ,
        "created_date":  "$first": "$_id.created_date" ,
        "enabled":  "$first": "$_id.enabled" ,
        "name":  "$first": "$_id.name" ,
        "groups": 
          "$push": 
            "_id": "$_id.groups._id",
            "name": "$_id.groups.name",
            "order": "$_id.groups.order",
            "created_date": "$_id.groups.created_date",
            "survey": "$_id.groups.survey",
            "questions": "$questions"
          
        
      ,
       "$sort":  "_id": 1  
    ]);

    log(results);

   catch(e) 
    console.error(e);
   finally 
    mongoose.disconnect();
  


)();

另一种情况

另外值得注意的是,通过一些小的架构更改,可以通过使用对.populate() 的嵌套调用来实现相同的结果:

  let alternate = await Survey.find().populate(
      path: 'groups',
      populate: 
        path: 'questions',
        populate: 
          path: 'responses'
        
      
    );

虽然它看起来更简单,但实际上引入了更多负载,因为这会向数据库发出多个查询以检索数据,而不是一次调用:

Mongoose: groups.find( survey:  '$in': [ ObjectId("59b6715725dcd2060da7f591") ]  ,  fields:  )
Mongoose: questions.find( _id:  '$in': [ ObjectId("59b6715725dcd2060da7f594"), ObjectId("59b6715725dcd2060da7f595"), ObjectId("59b6715725dcd2060da7f596"), ObjectId("59b6715725dcd2060da7f597") ]  ,  fields:  )
Mongoose: responses.find( question:  '$in': [ ObjectId("59b6715725dcd2060da7f594"), ObjectId("59b6715725dcd2060da7f595"), ObjectId("59b6715725dcd2060da7f596"), ObjectId("59b6715725dcd2060da7f597") ]  ,  fields:  )

您可以在修改后的清单中看到架构更改(只是为连接添加了虚拟字段)以及正在运行的代码:

const fs = require('fs'),
      mongoose = require('mongoose'),
      Schema = mongoose.Schema;

mongoose.Promise = global.Promise;
mongoose.set('debug',true);

const uri = 'mongodb://localhost/nested',
      options =  useMongoClient: true ;

const responseSchema = new Schema(
  response_text: String,
  order: String,
  created_date: Date,
  question:  type: Schema.Types.ObjectId, ref: 'Question' 
);

const questionSchema = new Schema(
  question_text: String,
  order: String,
  created_date: Date,
  group:  type: Schema.Types.ObjectId, ref: 'Group' 
,
  toJSON: 
    virtuals: true,
    transform: function(doc,obj) 
      delete obj.id;
      return obj;
    
  
);

questionSchema.virtual('responses',
  ref: 'Response',
  localField: '_id',
  foreignField: 'question'
);

const groupSchema = new Schema(
  name: String,
  order: String,
  created_date: Date,
  survey:  type: Schema.Types.ObjectId, ref: 'Survey' ,
  questions: [ type: Schema.Types.ObjectId, ref: 'Question' ]
);

const surveySchema = new Schema(
  company:  type: Schema.Types.ObjectId, ref: 'Company' ,
  created_date: Date,
  enabled: Boolean,
  name: String
,
  toJSON: 
    virtuals: true,
    transform: function(doc,obj) 
      delete obj.id;
      return obj;
    
  
);

surveySchema.virtual('groups',
  ref: 'Group',
  localField: '_id',
  foreignField: 'survey'
);

const companySchema = new Schema(

);

const Company = mongoose.model('Company', companySchema);
const Survey = mongoose.model('Survey', surveySchema);
const Group = mongoose.model('Group', groupSchema);
const Question = mongoose.model('Question', questionSchema);
const Response = mongoose.model('Response', responseSchema);


function log(data) 
  console.log(JSON.stringify(data,undefined,2))


(async function() 

  try 

    const conn = await mongoose.connect(uri,options);

    await Promise.all(
      Object.keys(conn.models).map( m => conn.models[m].remove() )
    );

    // Initialize data
    let content = JSON.parse(fs.readFileSync('./jsonSurveys.json'));
    //log(content);

    for ( let item of content ) 

      let survey = await Survey.create(item);
      let company = await Company.create( _id: survey.company );

      for ( let group of item.groups ) 
        await Group.create(group);
        for ( let question of group.questions ) 
          await Question.create(question);
          for ( let response of question.responses ) 
            await Response.create(response);
          
        
      

    

    // Run aggregation

    let results = await Survey.aggregate([
       "$lookup": 
        "from": Group.collection.name,
        "localField": "_id",
        "foreignField": "survey",
        "as": "groups"
      ,
       "$unwind": "$groups" ,
       "$lookup": 
        "from": Question.collection.name,
        "localField": "groups.questions",
        "foreignField": "_id",
        "as": "groups.questions"
      ,
       "$unwind": "$groups.questions" ,
       "$lookup": 
        "from": Response.collection.name,
        "localField": "groups.questions._id",
        "foreignField": "question",
        "as": "groups.questions.responses"
      ,
       "$group": 
        "_id": 
          "_id": "$_id",
          "company": "$company",
          "created_date": "$created_date",
          "enabled": "$enabled",
          "name": "$name",
          "groups": 
            "_id": "$groups._id",
            "name": "$groups.name",
            "order": "$groups.order",
            "created_date": "$groups.created_date",
            "survey": "$groups.survey"
          
        ,
        "questions":  "$push": "$groups.questions" 
      ,
       "$sort":  "_id": 1  ,
       "$group": 
        "_id": "$_id._id",
        "company":  "$first": "$_id.company" ,
        "created_date":  "$first": "$_id.created_date" ,
        "enabled":  "$first": "$_id.enabled" ,
        "name":  "$first": "$_id.name" ,
        "groups": 
          "$push": 
            "_id": "$_id.groups._id",
            "name": "$_id.groups.name",
            "order": "$_id.groups.order",
            "created_date": "$_id.groups.created_date",
            "survey": "$_id.groups.survey",
            "questions": "$questions"
          
        
      ,
       "$sort":  "_id": 1  
    ]);

    log(results);

    let alternate = await Survey.find().populate(
      path: 'groups',
      populate: 
        path: 'questions',
        populate: 
          path: 'responses'
        
      
    );

    log(alternate);

   catch(e) 
    console.error(e);
   finally 
    mongoose.disconnect();
  


)();

【讨论】:

嗨,这是最完整的冻糕答案,非常感谢:), 我还有一个问题,我会在前面使用 angular,我想分别创建和更新每个集合(调查、组、问题)并使用 $lookup 来显示调查,是这是可能的和正确的。对不起我的英语不好:(。 @SoufianeLam 当您有新问题时Ask a new Question。花时间写和解释,有人会尝试回答。可能吗。是的。我一直都这样做。这是一个相当广泛的问题。最好看看How do I ask a good question? 和What types of questions should I avoid asking?。只要你说的足够清楚你的意图,英语的准确性就没有那么重要了。其他人可以修改。 谢谢你的回答,好的,这是我在 *** 中的第一个 ASK,下次我会解释并花时间写一个问题:)。

以上是关于来自多个集合的 $lookup 和嵌套输出的主要内容,如果未能解决你的问题,请参考以下文章

来自多个集合的 $lookup 的 Mongoose 聚合返回空集

$lookup 与深度嵌套的对象

$lookup 嵌套文档

如何在 mongodb 中使用 $lookup 加入多个集合

Mongoose.aggregate(pipeline) 使用 $unwind、$lookup、$group 链接多个集合

MongoDB 聚合 - 我如何“$lookup”嵌套文档“_id”?