Indexes in MongoDB

From Training Material
Revision as of 10:26, 19 October 2018 by Kbaran (talk | contribs) (→‎Collections Without _id)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search


Indexes and Query Optimization

  • database index works like index in a book
  • indexes in MongoDB works almost identical to relational databases
  • because of indexes reads are faster but writes slower
  • 64 indexes per collection


Collection Without Indexes

for (var i=1; i<=1000000; i++) {
  db.visitors.insert({
    "i" : i,
    "visitor" : "visitor_"+i,
    "score" : Math.floor(Math.random()*10+1),
    "date" : new Date()
  })
}


One Field Indexes

db.visitors.explain().find({"visitor" : "visitor_330"})
db.visitors.explain().find({"visitor" : "visitor_330"}).limit(1)
db.visitors.explain().find({"visitor" : "visitor_99999"}).limit(1)
db.visitors.createIndex({"visitor" : 1})
db.visitors.explain().find({"visitor" : "visitor_99999"})


Compound Indexes

db.visitors.find().sort({"score" : 1, "visitor" : 1})
db.visitors.createIndex({"visitor" : 1})
db.visitors.createIndex({"score" : 1, "visitor" : 1})
db.visitors.explain(true).find({"score" : 10}).sort({"visitor" : -1})
db.visitors.explain(true).find({"score" : {"$gte" : 10, "$lte" : 20}})
db.visitors.explain(true).find({"score" : {"$gte" : 10, "$lte" : 20}}).sort({"visitor" : 1})
db.visitors.createIndex({"visitor" : 1, "score" : 1})
db.visitors.explain(true).find({"score" : {"$gte" : 10, "$lte" : 20}}).sort({"visitor" : 1}).hint({"visitor" : 1, "score" : 1})
db.visitors.dropIndex("visitor_1")


Compound Indexes (2)

  • pattern {SortKey : 1, CriteriaKey : 1}
  • pattern {ExactMatch : 1, RangeCriteria : 1}
  • sorting directions {"score" : -1, "visitor" : 1}
  • {"score" : -1, "visitor" : 1} = {"score" : 1, "visitor" : -1}
  • covered indexes (indexOnly : true or totalDocsExamined = 0 when totalKeysExamined > 0)
  • {key1 : 1, key2 : 1, key3 : 1} eliminates the need of creating {key1 : 1} and {key1 : 1, key2 : 1}
  • $-operators and indexes ($where, $exists, $nin, $ne, $not)
  • queries with $or can use more than one index


Indexing Arrays and SubDocuments

  • usually it behaves like normal index
  • difference between indexing {"BirthPlace" : 1} and {"BirthPlace.Country" : 1}
  • only one array field per index
  • multikey indexes
db.people.createIndex({"Movie.Title" : 1})
db.people.createIndex({"BirthPlace.Country" : 1})


Indexes in Details

  • low and high cardinality fields
  • understanding .explain()
  • using .hint()
  • query optimizer (100 results, 1000 queries, index creation)
  • fields in index must be smaller than 1kB
  • options: unique, dropDups, sparse
db.visitors.createIndex({"visitor" : 1}, {"unique" : 1, "dropDups" : 1})


Sparse Index

  • only contain entries for documents that have the indexed field (even if the index field contains a null value)
  • useful with unique constraint
db.sparse_index.insert([{y:1, x:1}, {y:1, x:2}, {y:1, x:3}, {y:1}])
db.sparse_index.createIndex({x:1}, {unique:1})
db.sparse_index.insert({y:1})

db.sparse_index.dropIndexes()
db.sparse_index.createIndex({x:1}, {unique:1, sparse:1})
db.sparse_index.insert({y:1})

db.sparse_index.find({"x" : {"$ne" : 2}}).hint({"x" : 1})
db.sparse_index.find({"x" : {"$ne" : 2}}).hint({"$natural" : 1})


Index Administration

  • system.indexes - read only collection that stores info about indexes
  • .createIndex()
  • .dropIndex(), .dropIndexes()
  • .getIndexes()
  • options: background, name


Capped Collections

  • it has to be created before first insert occurs
  • fixed size or size and number of documents (circular queue)
  • forbidden operations on documents: removing, updating (if it will increase the size)
  • can not be sharded or changed
  • sorting: $natural : 1 (or -1)
db.createCollection("capped_collection", {"capped" : true, "size" : 100000})
db.createCollection("capped_collection", {"capped" : true, "size" : 100000, "max" : 20})

db.people.copyTo("capped_collection")
db.runCommand({"convertToCapped" : "capped_collection", "size" : 100000})


Tailable Cursors

  • inspired by the tail -f command
  • not closed when their results are exhausted
  • can be used only on capped collection
  • will die after 10 minutes


TTL Indexes (time-to-live)

  • TTL index allows you to set a timeout for each document
  • removing is performed every 60 secons
  • can be created only on single field (date field)
db.ttl_collection.insert({"User" : "user1", "LastUpdated" : new Date()})
db.ttl_collection.createIndex({"LastUpdated" : 1}, {"expireAfterSeconds" : 30})
db.ttl_collection.find()


Full-Text Indexes

  • quick text search with built-in multi-language support
  • very expensive, especially on busy collections
db.people.createIndex({"Name" : "text"})
db.people.createIndex({"Name" : "text", "Bio" : "text"}, {"weights" : {"Name": 2}})
db.people.createIndex({"$**" : "text"})
db.people.createIndex({"whatever" : "text"}, {"weights" : {"Name" : 5, "Movie.Name" : 2, "$**" : 1}})

db.runCommand({"text" : "people", "search" : "emma thompson"})
db.people.find({$text : {$search : "emma thompson"}})
db.people.find({$text : {$search : "\"emma thompson\""}})
db.people.find({$text : {$search : "-emma thompson"}})
db.people.find({$text : {$search : "emma thompson"}}, {score : {$meta : "textScore"}})
db.people.find({$text : {$search : "emma thompson"}}, {score : {$meta : "textScore"}}).sort({ score: { $meta: "textScore" } })


Geospatial Indexes

2d index

  • for data stored as points on a two-dimensional plane
db.dots.insert([{Name:"A", location:[10, 5]}, {Name:"B", location:[17, -5]}, {Name:"C", location:[0, 2]}, {Name:"D", location:[-3, -3]}])
db.dots.createIndex({location:"2d", type:1})
db.dots.find({location:{$near:[0,0]}})


2dsphere index

db.cities.insert({Name:"Rzeszów", "location": {"type":"Point", "coordinates":[22.008606,50.040264]}})
db.cities.insert({Name:"Warszawa", "location": {"type":"Point", "coordinates":[21.0123237,52.2328474]}})
db.cities.insert({Name:"Wrocław", "location": {"type":"Point", "coordinates":[17.0342894,51.1174725]}})
db.cities.insert({Name:"Kraków", "location": {"type":"Point", "coordinates":[19.9012826,50.0719423]}})
db.cities.insert({Name:"Kielce", "location": {"type":"Point", "coordinates":[20.6156414,50.85404]}})
db.cities.createIndex({location:"2dsphere"})

db.cities.find({location:{$near: 
    {$geometry: {type:"Point", coordinates:[20.6156414, 50.85404]}, $minDistance: 0, $maxDistance:200000}
}})

db.runCommand({geoNear: "cities", near: [20.6156414, 50.85404], spherical: true, distanceMultiplier: 6378.1}) // distance in kilometers

db.cities.find({location: {$geoWithin: 
    {$geometry: {type: "Polygon", coordinates: [[ [22.008606,50.040264], [21.0123237,52.2328474], [19.9012826,50.0719423], [22.008606,50.040264] ]]}}
}})