Skip to content

Small fixes to the Matrix backfilling history script pt. 4 - Don't try to lookup by `virtualUser`/`parentId` at the same time which can be very slow

Eric Eastwood requested to merge backfill-messages-to-matrix-follow-up4 into develop

Small fixes to the Matrix backfilling history script pt. 4 - Don't try to lookup by virtualUser/parentId at the same time which can be very slow

Follow-up to !2313 (merged)

Part of #2609 (closed)

Why and what is it slow?

Our query to iterate over the main messages in the room looks like the following and can be very slow. I'm noticing this particularly in rooms where a single message was sent in the live room and was already bridged to Matrix. My guess is that this is because it can't find a message ID that is less than the live message ID we want to stop at, it keeps paginating through the whole history of messages. This kinda doesn't make sense though as it works fine for rooms with more messages, and from the query plan explanation I don't really see any difference to the stages (sort, input, filter) or in the executionStats (totalKeysExamined/totalDocsExamined). Or it's just that we don't have a compound index for virtualUser/parentId 🤷

db.chatmessages.find({
  "virtualUser" : {
    "$exists" : false
  },
  "parentId" : {
    "$exists" : false
  },
  "toTroupeId" : ObjectId("52a13d41ed5ab0b3bf04f1b8"),
  "_id" : {
    "$lt" : ObjectId("633015b1f4d7a323deecc37b")
  }
}).sort({ "_id": 1 })
.explain() query plan
{
  "queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "gitter.chatmessages",
    "indexFilterSet" : false,
    "parsedQuery" : {
      "$and" : [
        {
          "toTroupeId" : {
            "$eq" : ObjectId("52a13d41ed5ab0b3bf04f1b8")
          }
        },
        {
          "_id" : {
            "$lt" : ObjectId("633015b1f4d7a323deecc37b")
          }
        },
        {
          "$not" : {
            "parentId" : {
              "$exists" : true
            }
          }
        },
        {
          "$not" : {
            "virtualUser" : {
              "$exists" : true
            }
          }
        }
      ]
    },
    "winningPlan" : {
      "stage" : "SORT",
      "sortPattern" : {
        "_id" : 1
      },
      "inputStage" : {
        "stage" : "SORT_KEY_GENERATOR",
        "inputStage" : {
          "stage" : "FETCH",
          "filter" : {
            "$and" : [
              {
                "_id" : {
                  "$lt" : ObjectId("633015b1f4d7a323deecc37b")
                }
              },
              {
                "$not" : {
                  "parentId" : {
                    "$exists" : true
                  }
                }
              },
              {
                "$not" : {
                  "virtualUser" : {
                    "$exists" : true
                  }
                }
              }
            ]
          },
          "inputStage" : {
            "stage" : "IXSCAN",
            "keyPattern" : {
              "toTroupeId" : 1,
              "sent" : -1
            },
            "indexName" : "toTroupeId_1_sent_-1",
            "isMultiKey" : false,
            "isUnique" : false,
            "isSparse" : false,
            "isPartial" : false,
            "indexVersion" : 1,
            "direction" : "forward",
            "indexBounds" : {
              "toTroupeId" : [
                "[ObjectId('52a13d41ed5ab0b3bf04f1b8'), ObjectId('52a13d41ed5ab0b3bf04f1b8')]"
              ],
              "sent" : [
                "[MaxKey, MinKey]"
              ]
            }
          }
        }
      }
    },
    "rejectedPlans" : [
      {
        "stage" : "SORT",
        "sortPattern" : {
          "_id" : 1
        },
        "inputStage" : {
          "stage" : "SORT_KEY_GENERATOR",
          "inputStage" : {
            "stage" : "FETCH",
            "filter" : {
              "$and" : [
                {
                  "$not" : {
                    "parentId" : {
                      "$exists" : true
                    }
                  }
                },
                {
                  "toTroupeId" : {
                    "$eq" : ObjectId("52a13d41ed5ab0b3bf04f1b8")
                  }
                },
                {
                  "_id" : {
                    "$lt" : ObjectId("633015b1f4d7a323deecc37b")
                  }
                },
                {
                  "$not" : {
                    "virtualUser" : {
                      "$exists" : true
                    }
                  }
                }
              ]
            },
            "inputStage" : {
              "stage" : "IXSCAN",
              "keyPattern" : {
                "parentId" : 1,
                "sent" : -1
              },
              "indexName" : "parentId_1_sent_-1",
              "isMultiKey" : false,
              "isUnique" : false,
              "isSparse" : false,
              "isPartial" : false,
              "indexVersion" : 1,
              "direction" : "forward",
              "indexBounds" : {
                "parentId" : [
                  "[null, null]"
                ],
                "sent" : [
                  "[MaxKey, MinKey]"
                ]
              }
            }
          }
        }
      },
      {
        "stage" : "FETCH",
        "filter" : {
          "$and" : [
            {
              "toTroupeId" : {
                "$eq" : ObjectId("52a13d41ed5ab0b3bf04f1b8")
              }
            },
            {
              "$not" : {
                "parentId" : {
                  "$exists" : true
                }
              }
            },
            {
              "$not" : {
                "virtualUser" : {
                  "$exists" : true
                }
              }
            }
          ]
        },
        "inputStage" : {
          "stage" : "IXSCAN",
          "keyPattern" : {
            "_id" : 1
          },
          "indexName" : "_id_",
          "isMultiKey" : false,
          "isUnique" : true,
          "isSparse" : false,
          "isPartial" : false,
          "indexVersion" : 1,
          "direction" : "forward",
          "indexBounds" : {
            "_id" : [
              "[ObjectId('000000000000000000000000'), ObjectId('633015b1f4d7a323deecc37b'))"
            ]
          }
        }
      }
    ]
  },
  "serverInfo" : {
    ...
  },
  "ok" : 1
}

If we instead break up the query virtualUser vs parentId filters, it runs fast!

db.chatmessages.find({
  "virtualUser" : {
    "$exists" : false
  },
  "toTroupeId" : ObjectId("52a13d41ed5ab0b3bf04f1b8"),
  "_id" : {
    "$lt" : ObjectId("633015b1f4d7a323deecc37b")
  }
}).sort({ "_id": 1 });
.explain() query plan
{
  "queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "gitter.chatmessages",
    "indexFilterSet" : false,
    "parsedQuery" : {
      "$and" : [
        {
          "toTroupeId" : {
            "$eq" : ObjectId("52a13d41ed5ab0b3bf04f1b8")
          }
        },
        {
          "_id" : {
            "$lt" : ObjectId("633015b1f4d7a323deecc37b")
          }
        },
        {
          "$not" : {
            "virtualUser" : {
              "$exists" : true
            }
          }
        }
      ]
    },
    "winningPlan" : {
      "stage" : "SORT",
      "sortPattern" : {
        "_id" : 1
      },
      "inputStage" : {
        "stage" : "SORT_KEY_GENERATOR",
        "inputStage" : {
          "stage" : "FETCH",
          "filter" : {
            "$and" : [
              {
                "_id" : {
                  "$lt" : ObjectId("633015b1f4d7a323deecc37b")
                }
              },
              {
                "$not" : {
                  "virtualUser" : {
                    "$exists" : true
                  }
                }
              }
            ]
          },
          "inputStage" : {
            "stage" : "IXSCAN",
            "keyPattern" : {
              "toTroupeId" : 1,
              "sent" : -1
            },
            "indexName" : "toTroupeId_1_sent_-1",
            "isMultiKey" : false,
            "isUnique" : false,
            "isSparse" : false,
            "isPartial" : false,
            "indexVersion" : 1,
            "direction" : "forward",
            "indexBounds" : {
              "toTroupeId" : [
                "[ObjectId('52a13d41ed5ab0b3bf04f1b8'), ObjectId('52a13d41ed5ab0b3bf04f1b8')]"
              ],
              "sent" : [
                "[MaxKey, MinKey]"
              ]
            }
          }
        }
      }
    },
    "rejectedPlans" : [
      {
        "stage" : "FETCH",
        "filter" : {
          "$and" : [
            {
              "toTroupeId" : {
                "$eq" : ObjectId("52a13d41ed5ab0b3bf04f1b8")
              }
            },
            {
              "$not" : {
                "virtualUser" : {
                  "$exists" : true
                }
              }
            }
          ]
        },
        "inputStage" : {
          "stage" : "IXSCAN",
          "keyPattern" : {
            "_id" : 1
          },
          "indexName" : "_id_",
          "isMultiKey" : false,
          "isUnique" : true,
          "isSparse" : false,
          "isPartial" : false,
          "indexVersion" : 1,
          "direction" : "forward",
          "indexBounds" : {
            "_id" : [
              "[ObjectId('000000000000000000000000'), ObjectId('633015b1f4d7a323deecc37b'))"
            ]
          }
        }
      }
    ]
  },
  "serverInfo" : {
    ...
  },
  "ok" : 1
}
db.chatmessages.find({
  "parentId" : {
    "$exists" : false
  },
  "toTroupeId" : ObjectId("52a13d41ed5ab0b3bf04f1b8"),
  "_id" : {
    "$lt" : ObjectId("633015b1f4d7a323deecc37b")
  }
}).sort({ "_id": 1 })
.explain() query plan
{
  "queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "gitter.chatmessages",
    "indexFilterSet" : false,
    "parsedQuery" : {
      "$and" : [
        {
          "toTroupeId" : {
            "$eq" : ObjectId("52a13d41ed5ab0b3bf04f1b8")
          }
        },
        {
          "_id" : {
            "$lt" : ObjectId("633015b1f4d7a323deecc37b")
          }
        },
        {
          "$not" : {
            "parentId" : {
              "$exists" : true
            }
          }
        }
      ]
    },
    "winningPlan" : {
      "stage" : "SORT",
      "sortPattern" : {
        "_id" : 1
      },
      "inputStage" : {
        "stage" : "SORT_KEY_GENERATOR",
        "inputStage" : {
          "stage" : "FETCH",
          "filter" : {
            "$and" : [
              {
                "_id" : {
                  "$lt" : ObjectId("633015b1f4d7a323deecc37b")
                }
              },
              {
                "$not" : {
                  "parentId" : {
                    "$exists" : true
                  }
                }
              }
            ]
          },
          "inputStage" : {
            "stage" : "IXSCAN",
            "keyPattern" : {
              "toTroupeId" : 1,
              "sent" : -1
            },
            "indexName" : "toTroupeId_1_sent_-1",
            "isMultiKey" : false,
            "isUnique" : false,
            "isSparse" : false,
            "isPartial" : false,
            "indexVersion" : 1,
            "direction" : "forward",
            "indexBounds" : {
              "toTroupeId" : [
                "[ObjectId('52a13d41ed5ab0b3bf04f1b8'), ObjectId('52a13d41ed5ab0b3bf04f1b8')]"
              ],
              "sent" : [
                "[MaxKey, MinKey]"
              ]
            }
          }
        }
      }
    },
    "rejectedPlans" : [
      {
        "stage" : "SORT",
        "sortPattern" : {
          "_id" : 1
        },
        "inputStage" : {
          "stage" : "SORT_KEY_GENERATOR",
          "inputStage" : {
            "stage" : "FETCH",
            "filter" : {
              "$and" : [
                {
                  "$not" : {
                    "parentId" : {
                      "$exists" : true
                    }
                  }
                },
                {
                  "toTroupeId" : {
                    "$eq" : ObjectId("52a13d41ed5ab0b3bf04f1b8")
                  }
                },
                {
                  "_id" : {
                    "$lt" : ObjectId("633015b1f4d7a323deecc37b")
                  }
                }
              ]
            },
            "inputStage" : {
              "stage" : "IXSCAN",
              "keyPattern" : {
                "parentId" : 1,
                "sent" : -1
              },
              "indexName" : "parentId_1_sent_-1",
              "isMultiKey" : false,
              "isUnique" : false,
              "isSparse" : false,
              "isPartial" : false,
              "indexVersion" : 1,
              "direction" : "forward",
              "indexBounds" : {
                "parentId" : [
                  "[null, null]"
                ],
                "sent" : [
                  "[MaxKey, MinKey]"
                ]
              }
            }
          }
        }
      },
      {
        "stage" : "FETCH",
        "filter" : {
          "$and" : [
            {
              "toTroupeId" : {
                "$eq" : ObjectId("52a13d41ed5ab0b3bf04f1b8")
              }
            },
            {
              "$not" : {
                "parentId" : {
                  "$exists" : true
                }
              }
            }
          ]
        },
        "inputStage" : {
          "stage" : "IXSCAN",
          "keyPattern" : {
            "_id" : 1
          },
          "indexName" : "_id_",
          "isMultiKey" : false,
          "isUnique" : true,
          "isSparse" : false,
          "isPartial" : false,
          "indexVersion" : 1,
          "direction" : "forward",
          "indexBounds" : {
            "_id" : [
              "[ObjectId('000000000000000000000000'), ObjectId('633015b1f4d7a323deecc37b'))"
            ]
          }
        }
      }
    ]
  },
  "serverInfo" : {
    ...
  },
  "ok" : 1
}
Edited by Eric Eastwood

Merge request reports