Skip to content

Commit

Permalink
DB: detach getter of latest snapshots set data from `get_latest_snaps…
Browse files Browse the repository at this point in the history
…hots`

This simplifies `get_latest_snapshots` implementation and creates
room for new `get_distinct_val_count` method (WIP).
  • Loading branch information
DavidB137 committed Dec 20, 2023
1 parent 075f4af commit e4ed694
Showing 1 changed file with 30 additions and 12 deletions.
42 changes: 30 additions & 12 deletions dp3/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,31 @@ def get_latest_snapshot(self, etype: str, eid: str) -> dict:
snapshot_col = self._snapshots_col_name(etype)
return self._db[snapshot_col].find_one({"eid": eid}, sort=[("_id", -1)]) or {}

def _get_latest_snapshots_date(self) -> Optional[datetime]:
"""Get date of newest snapshot set.
Queries snapshots metadata to find latest fully completed set of snapshots
and returns it's date.
If it doesn't exist, returns `None`.
Note: This doesn't take into account etypes, so it may be inaccurate for new entity types.
"""
# Get latest fully completed snapshot metadata
lfcsm = self._db["#metadata"].find_one(
{
"#module": "SnapShooter",
"workers_finished": self._num_processes,
"linked_finished": True,
},
sort=[("#time_created", -1)],
)

if lfcsm is None:
return None

# Extract date
return lfcsm["#time_created"]

def get_latest_snapshots(
self, etype: str, fulltext_filters: Optional[dict[str, str]] = None
) -> tuple[pymongo.cursor.Cursor, int]:
Expand All @@ -534,23 +559,16 @@ def get_latest_snapshots(

snapshot_col = self._snapshots_col_name(etype)

# Find newest fully completed snapshot set
latest_fully_completed_snapshot_metadata = self._db["#metadata"].find_one(
{
"#module": "SnapShooter",
"workers_finished": self._num_processes,
"linked_finished": True,
},
sort=[("#time_created", -1)],
)
# Find newest fully completed snapshot date
latest_snapshot_date = self._get_latest_snapshots_date()

if latest_fully_completed_snapshot_metadata is None:
# There are no fully completed snapshots sets - return all currently existing snapshots
if latest_snapshot_date is None:
return self._db[snapshot_col].find().sort([("eid", pymongo.ASCENDING)]), self._db[
snapshot_col
].count_documents({})

# Extract date and query using it
latest_snapshot_date = latest_fully_completed_snapshot_metadata["#time_created"]
# Create base of query
query = {"_time_created": latest_snapshot_date}

if not fulltext_filters:
Expand Down

0 comments on commit e4ed694

Please sign in to comment.