Skip to content

mongo

MongoCollection

Class for querying MongoDB collections (implemented by either pymongo or mongomock) containing serialized EntryResources objects.

Source code in optimade/server/entry_collections/mongo.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
class MongoCollection(EntryCollection):
    """Class for querying MongoDB collections (implemented by either pymongo or mongomock)
    containing serialized [`EntryResource`][optimade.models.entries.EntryResource]s objects.

    """

    def __init__(
        self,
        name: str,
        resource_cls: EntryResource,
        resource_mapper: BaseResourceMapper,
        database: str = CONFIG.mongo_database,
    ):
        """Initialize the MongoCollection for the given parameters.

        Parameters:
            name: The name of the collection.
            resource_cls: The type of entry resource that is stored by the collection.
            resource_mapper: A resource mapper object that handles aliases and
                format changes between deserialization and response.
            database: The name of the underlying MongoDB database to connect to.

        """
        super().__init__(
            resource_cls,
            resource_mapper,
            MongoTransformer(mapper=resource_mapper),
        )

        self.parser = LarkParser(version=(1, 0, 0), variant="default")
        self.collection = CLIENT[database][name]

        # check aliases do not clash with mongo operators
        self._check_aliases(self.resource_mapper.all_aliases())
        self._check_aliases(self.resource_mapper.all_length_aliases())

    def __len__(self) -> int:
        """Returns the total number of entries in the collection."""
        return self.collection.estimated_document_count()

    def count(self, **kwargs: Any) -> int:
        """Returns the number of entries matching the query specified
        by the keyword arguments.

        Parameters:
            **kwargs: Query parameters as keyword arguments. The keys
                'filter', 'skip', 'limit', 'hint' and 'maxTimeMS' will be passed
                to the `pymongo.collection.Collection.count_documents` method.

        """
        for k in list(kwargs.keys()):
            if k not in ("filter", "skip", "limit", "hint", "maxTimeMS"):
                del kwargs[k]
        if "filter" not in kwargs:  # "filter" is needed for count_documents()
            kwargs["filter"] = {}
        return self.collection.count_documents(**kwargs)

    def insert(self, data: List[EntryResource]) -> None:
        """Add the given entries to the underlying database.

        Warning:
            No validation is performed on the incoming data.

        Arguments:
            data: The entry resource objects to add to the database.

        """
        self.collection.insert_many(data)

    def handle_query_params(
        self, params: Union[EntryListingQueryParams, SingleEntryQueryParams]
    ) -> Dict[str, Any]:
        """Parse and interpret the backend-agnostic query parameter models into a dictionary
        that can be used by MongoDB.

        This Mongo-specific method calls the base `EntryCollection.handle_query_params` method
        and adds additional handling of the MongoDB ObjectID type.

        Parameters:
            params: The initialized query parameter model from the server.

        Raises:
            Forbidden: If too large of a page limit is provided.
            BadRequest: If an invalid request is made, e.g., with incorrect fields
                or response format.

        Returns:
            A dictionary representation of the query parameters.

        """
        criteria = super().handle_query_params(params)
        # Handle MongoDB ObjectIDs:
        # - If they were not requested, then explicitly remove them
        # - If they were requested, then cast them to strings in the response
        if "_id" not in criteria.get("projection", {}):
            criteria["projection"]["_id"] = False

        if criteria.get("projection", {}).get("_id"):
            criteria["projection"]["_id"] = {"$toString": "$_id"}

        return criteria

    def _run_db_query(
        self, criteria: Dict[str, Any], single_entry: bool = False
    ) -> Tuple[List[Dict[str, Any]], int, bool]:
        """Run the query on the backend and collect the results.

        Arguments:
            criteria: A dictionary representation of the query parameters.
            single_entry: Whether or not the caller is expecting a single entry response.

        Returns:
            The list of entries from the database (without any re-mapping), the total number of
            entries matching the query and a boolean for whether or not there is more data available.

        """
        results = list(self.collection.find(**criteria))

        if CONFIG.database_backend == SupportedBackend.MONGOMOCK and criteria.get(
            "projection", {}
        ).get("_id"):
            # mongomock does not support `$toString`` in projection, so we have to do it manually
            for ind, doc in enumerate(results):
                results[ind]["_id"] = str(doc["_id"])

        nresults_now = len(results)
        if not single_entry:
            criteria_nolimit = criteria.copy()
            criteria_nolimit.pop("limit", None)
            skip = criteria_nolimit.pop("skip", 0)
            data_returned = self.count(**criteria_nolimit)
            more_data_available = nresults_now + skip < data_returned
        else:
            # SingleEntryQueryParams, e.g., /structures/{entry_id}
            data_returned = nresults_now
            more_data_available = False

        return results, data_returned, more_data_available

    def _check_aliases(self, aliases):
        """Check that aliases do not clash with mongo keywords."""
        if any(
            alias[0].startswith("$") or alias[1].startswith("$") for alias in aliases
        ):
            raise RuntimeError(f"Cannot define an alias starting with a '$': {aliases}")

__init__(name, resource_cls, resource_mapper, database=CONFIG.mongo_database)

Initialize the MongoCollection for the given parameters.

Parameters:

Name Type Description Default
name str

The name of the collection.

required
resource_cls EntryResource

The type of entry resource that is stored by the collection.

required
resource_mapper BaseResourceMapper

A resource mapper object that handles aliases and format changes between deserialization and response.

required
database str

The name of the underlying MongoDB database to connect to.

CONFIG.mongo_database
Source code in optimade/server/entry_collections/mongo.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def __init__(
    self,
    name: str,
    resource_cls: EntryResource,
    resource_mapper: BaseResourceMapper,
    database: str = CONFIG.mongo_database,
):
    """Initialize the MongoCollection for the given parameters.

    Parameters:
        name: The name of the collection.
        resource_cls: The type of entry resource that is stored by the collection.
        resource_mapper: A resource mapper object that handles aliases and
            format changes between deserialization and response.
        database: The name of the underlying MongoDB database to connect to.

    """
    super().__init__(
        resource_cls,
        resource_mapper,
        MongoTransformer(mapper=resource_mapper),
    )

    self.parser = LarkParser(version=(1, 0, 0), variant="default")
    self.collection = CLIENT[database][name]

    # check aliases do not clash with mongo operators
    self._check_aliases(self.resource_mapper.all_aliases())
    self._check_aliases(self.resource_mapper.all_length_aliases())

__len__()

Returns the total number of entries in the collection.

Source code in optimade/server/entry_collections/mongo.py
69
70
71
def __len__(self) -> int:
    """Returns the total number of entries in the collection."""
    return self.collection.estimated_document_count()

count(**kwargs)

Returns the number of entries matching the query specified by the keyword arguments.

Parameters:

Name Type Description Default
**kwargs Any

Query parameters as keyword arguments. The keys 'filter', 'skip', 'limit', 'hint' and 'maxTimeMS' will be passed to the pymongo.collection.Collection.count_documents method.

{}
Source code in optimade/server/entry_collections/mongo.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def count(self, **kwargs: Any) -> int:
    """Returns the number of entries matching the query specified
    by the keyword arguments.

    Parameters:
        **kwargs: Query parameters as keyword arguments. The keys
            'filter', 'skip', 'limit', 'hint' and 'maxTimeMS' will be passed
            to the `pymongo.collection.Collection.count_documents` method.

    """
    for k in list(kwargs.keys()):
        if k not in ("filter", "skip", "limit", "hint", "maxTimeMS"):
            del kwargs[k]
    if "filter" not in kwargs:  # "filter" is needed for count_documents()
        kwargs["filter"] = {}
    return self.collection.count_documents(**kwargs)

handle_query_params(params)

Parse and interpret the backend-agnostic query parameter models into a dictionary that can be used by MongoDB.

This Mongo-specific method calls the base EntryCollection.handle_query_params method and adds additional handling of the MongoDB ObjectID type.

Parameters:

Name Type Description Default
params Union[EntryListingQueryParams, SingleEntryQueryParams]

The initialized query parameter model from the server.

required

Raises:

Type Description
Forbidden

If too large of a page limit is provided.

BadRequest

If an invalid request is made, e.g., with incorrect fields or response format.

Returns:

Type Description
Dict[str, Any]

A dictionary representation of the query parameters.

Source code in optimade/server/entry_collections/mongo.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def handle_query_params(
    self, params: Union[EntryListingQueryParams, SingleEntryQueryParams]
) -> Dict[str, Any]:
    """Parse and interpret the backend-agnostic query parameter models into a dictionary
    that can be used by MongoDB.

    This Mongo-specific method calls the base `EntryCollection.handle_query_params` method
    and adds additional handling of the MongoDB ObjectID type.

    Parameters:
        params: The initialized query parameter model from the server.

    Raises:
        Forbidden: If too large of a page limit is provided.
        BadRequest: If an invalid request is made, e.g., with incorrect fields
            or response format.

    Returns:
        A dictionary representation of the query parameters.

    """
    criteria = super().handle_query_params(params)
    # Handle MongoDB ObjectIDs:
    # - If they were not requested, then explicitly remove them
    # - If they were requested, then cast them to strings in the response
    if "_id" not in criteria.get("projection", {}):
        criteria["projection"]["_id"] = False

    if criteria.get("projection", {}).get("_id"):
        criteria["projection"]["_id"] = {"$toString": "$_id"}

    return criteria

insert(data)

Add the given entries to the underlying database.

Warning

No validation is performed on the incoming data.

Parameters:

Name Type Description Default
data List[EntryResource]

The entry resource objects to add to the database.

required
Source code in optimade/server/entry_collections/mongo.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def insert(self, data: List[EntryResource]) -> None:
    """Add the given entries to the underlying database.

    Warning:
        No validation is performed on the incoming data.

    Arguments:
        data: The entry resource objects to add to the database.

    """
    self.collection.insert_many(data)