一、简介
-
mysql适合结构化数据,类似excel表格一样定义严格的数据,用于数据量中,速度一般支持事务处理场合
-
redis适合缓存内存对象,如缓存队列,用于数据量小,速度快不支持事务处理高并发场合
-
mongodb,适合半结构化数据,如文本信息,用于数据量大,速度较快不支持事务处理场合
-
hadoop是个生态系统,上面有大数据分析很多组件,适合事后大数据分析任务
-
spark类似hadoop,偏向于内存计算,流计算,适合实时半实时大数据分析任务
移动互联网及物联网让数据呈指数增长,NoSql大数据新起后,数据存储领域发展很快,似乎方向都是向大数据,内存计算,分布式框架,平台化发展,出现不少新的方法,普通应用TB,GB级别达不到PB级别的数据存储,用mongodb,mysql就够了,hadoop,spark这类是航母一般多是大规模应用场景,多用于事后分析统计用,如电商的推荐系统分析系统。IAO
看标题,这里是不是跑题了呢,显然不是,了解一下mongodb在存储中的位置还是非常有必要的,explain 和 hint 一看就知道是从mysql借鉴过来的(猜的),实际就是检测查询语句的性能和使用强制索引
二、explain
先写入测试数据
db.test.insertMany([ { "_id" : 1, "a" : "f1", b: "food", c: 500 }, { "_id" : 2, "a" : "f2", b: "food", c: 100 }, { "_id" : 3, "a" : "p1", b: "paper", c: 200 }, { "_id" : 4, "a" : "p2", b: "paper", c: 150 }, { "_id" : 5, "a" : "f3", b: "food", c: 300 }, { "_id" : 6, "a" : "t1", b: "toys", c: 500 }, { "_id" : 7, "a" : "a1", b: "apparel", c: 250 }, { "_id" : 8, "a" : "a2", b: "apparel", c: 400 }, { "_id" : 9, "a" : "t2", b: "toys", c: 50 }, { "_id" : 10, "a" : "f4", b: "food", c: 75 }]);
写入成功返回值
{ "acknowledged" : true, "insertedIds" : [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] }
开始查询
> db.test.find(); { "_id" : 1, "a" : "f1", "b" : "food", "c" : 500 } { "_id" : 2, "a" : "f2", "b" : "food", "c" : 100 } { "_id" : 3, "a" : "p1", "b" : "paper", "c" : 200 } { "_id" : 4, "a" : "p2", "b" : "paper", "c" : 150 } { "_id" : 5, "a" : "f3", "b" : "food", "c" : 300 } { "_id" : 6, "a" : "t1", "b" : "toys", "c" : 500 } { "_id" : 7, "a" : "a1", "b" : "apparel", "c" : 250 } { "_id" : 8, "a" : "a2", "b" : "apparel", "c" : 400 } { "_id" : 9, "a" : "t2", "b" : "toys", "c" : 50 } { "_id" : 10, "a" : "f4", "b" : "food", "c" : 75 } > db.test.find().count(); 10 > db.test.find({ c: { $gte: 100, $lte: 200 }}).count() 3 > db.test.find({ c: { $gte: 100, $lte: 200 }}).explain("executionStats") { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.test", "indexFilterSet" : false, "parsedQuery" : { "$and" : [ { "c" : { "$lte" : 200 } }, { "c" : { "$gte" : 100 } } ] }, "winningPlan" : { "stage" : "COLLSCAN", "filter" : { "$and" : [ { "c" : { "$lte" : 200 } }, { "c" : { "$gte" : 100 } } ] }, "direction" : "forward" }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 3, "executionTimeMillis" : 0, "totalKeysExamined" : 0, "totalDocsExamined" : 10, "executionStages" : { "stage" : "COLLSCAN", "filter" : { "$and" : [ { "c" : { "$lte" : 200 } }, { "c" : { "$gte" : 100 } } ] }, "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 12, "advanced" : 3, "needTime" : 8, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "direction" : "forward", "docsExamined" : 10 } }, "serverInfo" : { "host" : "iZbp1g11g0cdeeq9ht9fhjZ", "port" : 27017, "version" : "3.4.12", "gitVersion" : "bfde702b19c1baad532ed183a871c12630c1bbba" }, "ok" : 1 }
看一下几个关键词
"stage" : "COLLSCAN",
"nReturned" : 3,
"totalDocsExamined" : 10,
全部扫描,不走索引,这里只是演示,所以数据量比较少,如果数据量多起来这样查询将会很慢,甚至会卡死
COLLSCAN
这个是什么意思呢? 如果你仔细一看,应该知道就是CollectionScan,就是所谓的“集合扫描”,对不对,看到集合扫描是不是就可以直接map到数据库中的table scan/heap scan呢??? 是的,这个就是所谓的性能最烂最无奈的由来。
nReturned
这个很简单,就是所谓的numReturned,就是说最后返回的num个数,从图中可以看到,就是最终返回了三条。。。
docsExamined
那这个是什么意思呢??就是documentsExamined,检查了10个documents。。。而从返回上面的nReturned。
创建索引并查询
> db.test.createIndex({ c:1}) { "createdCollectionAutomatically" : false, "numIndexesBefore" : 1, "numIndexesAfter" : 2, "ok" : 1 } > db.test.find({ c: { $gte: 100, $lte: 200 }}).explain("executionStats") { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.test", "indexFilterSet" : false, "parsedQuery" : { "$and" : [ { "c" : { "$lte" : 200 } }, { "c" : { "$gte" : 100 } } ] }, "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "c" : 1 }, "indexName" : "c_1", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 1, "direction" : "forward", "indexBounds" : { "c" : [ "[100.0, 200.0]" ] } } }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 3, "executionTimeMillis" : 0, "totalKeysExamined" : 3, "totalDocsExamined" : 3, "executionStages" : { "stage" : "FETCH", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "docsExamined" : 3, "alreadyHasObj" : 0, "inputStage" : { "stage" : "IXSCAN", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "keyPattern" : { "c" : 1 }, "indexName" : "c_1", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 1, "direction" : "forward", "indexBounds" : { "c" : [ "[100.0, 200.0]" ] }, "keysExamined" : 3, "seeks" : 1, "dupsTested" : 0, "dupsDropped" : 0, "seenInvalidated" : 0 } } }, "serverInfo" : { "host" : "iZbp1g11g0cdeeq9ht9fhjZ", "port" : 27017, "version" : "3.4.12", "gitVersion" : "bfde702b19c1baad532ed183a871c12630c1bbba" }, "ok" : 1 }
再看看上面几个关键词
"stage" : "IXSCAN"
"totalDocsExamined" : 3,
瞬间就少了,这样查询时间也会大大减少
三、hint
这时一个很好玩的一个东西,就是用来force mongodb to excute special index,对吧,为了方便演示,我们做两组复合索引,比如这次我们在c和b上构建一下:
创建索引
> db.test.createIndex({ c:1,b:1}) { "createdCollectionAutomatically" : false, "numIndexesBefore" : 2, "numIndexesAfter" : 3, "ok" : 1 } > db.test.createIndex({ b:1,c:1}) { "createdCollectionAutomatically" : false, "numIndexesBefore" : 3, "numIndexesAfter" : 4, "ok" : 1 }
hint查询
> db.test.find({ c: { $gte: 100, $lte: 200 },b:"food"}).hint({c:1,b:1}).explain("executionStats") { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.test", "indexFilterSet" : false, "parsedQuery" : { "$and" : [ { "b" : { "$eq" : "food" } }, { "c" : { "$lte" : 200 } }, { "c" : { "$gte" : 100 } } ] }, "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "c" : 1, "b" : 1 }, "indexName" : "c_1_b_1", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 1, "direction" : "forward", "indexBounds" : { "c" : [ "[100.0, 200.0]" ], "b" : [ "[\"food\", \"food\"]" ] } } }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 1, "executionTimeMillis" : 0, "totalKeysExamined" : 3, "totalDocsExamined" : 1, "executionStages" : { "stage" : "FETCH", "nReturned" : 1, "executionTimeMillisEstimate" : 10, "works" : 3, "advanced" : 1, "needTime" : 1, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "docsExamined" : 1, "alreadyHasObj" : 0, "inputStage" : { "stage" : "IXSCAN", "nReturned" : 1, "executionTimeMillisEstimate" : 10, "works" : 3, "advanced" : 1, "needTime" : 1, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "keyPattern" : { "c" : 1, "b" : 1 }, "indexName" : "c_1_b_1", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 1, "direction" : "forward", "indexBounds" : { "c" : [ "[100.0, 200.0]" ], "b" : [ "[\"food\", \"food\"]" ] }, "keysExamined" : 3, "seeks" : 2, "dupsTested" : 0, "dupsDropped" : 0, "seenInvalidated" : 0 } } }, "serverInfo" : { "host" : "iZbp1g11g0cdeeq9ht9fhjZ", "port" : 27017, "version" : "3.4.12", "gitVersion" : "bfde702b19c1baad532ed183a871c12630c1bbba" }, "ok" : 1 }
正常查询
> db.test.find({ c: { $gte: 100, $lte: 200 },b:"food"}).explain("executionStats") { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.test", "indexFilterSet" : false, "parsedQuery" : { "$and" : [ { "b" : { "$eq" : "food" } }, { "c" : { "$lte" : 200 } }, { "c" : { "$gte" : 100 } } ] }, "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "b" : 1, "c" : 1 }, "indexName" : "b_1_c_1", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 1, "direction" : "forward", "indexBounds" : { "b" : [ "[\"food\", \"food\"]" ], "c" : [ "[100.0, 200.0]" ] } } }, "rejectedPlans" : [ { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "c" : 1, "b" : 1 }, "indexName" : "c_1_b_1", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 1, "direction" : "forward", "indexBounds" : { "c" : [ "[100.0, 200.0]" ], "b" : [ "[\"food\", \"food\"]" ] } } }, { "stage" : "FETCH", "filter" : { "b" : { "$eq" : "food" } }, "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "c" : 1 }, "indexName" : "c_1", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 1, "direction" : "forward", "indexBounds" : { "c" : [ "[100.0, 200.0]" ] } } } ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 1, "executionTimeMillis" : 0, "totalKeysExamined" : 1, "totalDocsExamined" : 1, "executionStages" : { "stage" : "FETCH", "nReturned" : 1, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 1, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "docsExamined" : 1, "alreadyHasObj" : 0, "inputStage" : { "stage" : "IXSCAN", "nReturned" : 1, "executionTimeMillisEstimate" : 0, "works" : 2, "advanced" : 1, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "keyPattern" : { "b" : 1, "c" : 1 }, "indexName" : "b_1_c_1", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 1, "direction" : "forward", "indexBounds" : { "b" : [ "[\"food\", \"food\"]" ], "c" : [ "[100.0, 200.0]" ] }, "keysExamined" : 1, "seeks" : 1, "dupsTested" : 0, "dupsDropped" : 0, "seenInvalidated" : 0 } } }, "serverInfo" : { "host" : "iZbp1g11g0cdeeq9ht9fhjZ", "port" : 27017, "version" : "3.4.12", "gitVersion" : "bfde702b19c1baad532ed183a871c12630c1bbba" }, "ok" : 1 }
主要对比的还是:
"totalKeysExamined" : 3,
"totalDocsExamined" : 1,
和
"totalKeysExamined" : 1,
"totalDocsExamined" : 1,
是不是比较有意思,有时候monogdb并不会,走你想要的索引,当你创建多个联合索引的时候,情况就比较明显了