From d14adf4eeba5292cdeb1d686004fd4779b606560 Mon Sep 17 00:00:00 2001 From: Nicholas FitzRoy-Dale Date: Wed, 18 Oct 2023 13:05:10 +0100 Subject: [PATCH] Rewrite database subsetting Previously, subsetting involved creating entities called RowSubsets which were then passed to a function which created a database using them. Now, you create the new database first as a context manager, and use it to produce RowSubset objects. The new approach means the context manager can track its own subsets, so you don't need to explicitly list them. --- rime/graphql.py | 24 ++++-- rime/provider.py | 14 ++++ rime/providers/androidcontacts.py | 36 ++++----- rime/providers/androidgenericmedia.py | 4 + rime/providers/androidtelephony.py | 29 ++++--- rime/providers/androidwhatsapp.py | 110 ++++++++++++-------------- rime/providers/imessage.py | 53 +++++++------ rime/providers/ioscontacts.py | 18 ++--- rime/providers/ioswhatsapp.py | 50 ++++++------ rime/providers/providernames.py | 3 + rime/subset.py | 77 +++++++++++++----- 11 files changed, 240 insertions(+), 178 deletions(-) diff --git a/rime/graphql.py b/rime/graphql.py index 69be8f5..d8b247a 100644 --- a/rime/graphql.py +++ b/rime/graphql.py @@ -21,7 +21,7 @@ from .event import MessageEvent, MediaEvent from .mergedcontact import merge_contacts from .anonymise import Anonymiser -from .subset import Subsetter +from .subset import DeviceSubsetter, ProviderSubsetter, SubsetOptions, SubsetFillOption from .device import Device from .provider import Provider from .event import Event, MessageSession @@ -495,7 +495,8 @@ def _create_subset_prepare_device(rime, target): return device, new_device -def _create_subset_populate_device(rime, device, new_device, events_filter_obj, contacts_filter_obj): +def _create_subset_populate_device(rime, opts: SubsetOptions, device, new_device, events_filter_obj, + contacts_filter_obj): """ Create a subset of 'targets' with events and contacts matching the supplied filters. @@ -504,7 +505,7 @@ def _create_subset_populate_device(rime, device, new_device, events_filter_obj, Raises CreateSubsetError for any error we might expect callers to reasonably deal with. May raise anything else if something goes wrong (e.g. while a particular provider is perfoming a subset). """ - subsetter = Subsetter(new_device.fs) + device_subsetter = DeviceSubsetter(new_device.fs, opts) # Find and remember the contacts subset. contacts_by_provider = { @@ -522,12 +523,16 @@ def _create_subset_populate_device(rime, device, new_device, events_filter_obj, else: contacts_for_provider = [] - ebp.provider.subset(subsetter, ebp.events, contacts_for_provider) + provider_subsetter = ProviderSubsetter(device_subsetter, opts) + + ebp.provider.subset(provider_subsetter, ebp.events, contacts_for_provider) # Also subset contacts-only providers with no subsetted events. for provider_name in unsubsetted_contact_providers: provider = device.providers[provider_name] - provider.subset(subsetter, [], contacts_by_provider[provider_name]) + + provider_subsetter = ProviderSubsetter(device_subsetter, opts) + provider.subset(provider_subsetter, [], contacts_by_provider[provider_name]) new_device.reload_providers() @@ -543,6 +548,12 @@ def resolve_create_subset(rime, info, targets, eventsFilter, contactsFilter, ano devices = [] # list of (old device, new device) + # Create default subset options. TODO: Expose these to GraphQL. + opts = SubsetOptions( + fill=SubsetFillOption.UNUSED_DBS_AND_TABLES, + anonymise=anonymise, + ) + async def _create_subset_impl(bg_rime): # TODO: Error reporting, status updates errorMessage = None @@ -557,7 +568,8 @@ async def _create_subset_impl(bg_rime): try: for old_device, new_device in devices: - _create_subset_populate_device(bg_rime, old_device, new_device, events_filter_obj, contacts_filter_obj) + _create_subset_populate_device(bg_rime, opts, old_device, new_device, events_filter_obj, + contacts_filter_obj) if anonymiser: for provider in new_device.providers.values(): anonymiser.anonymise_device_provider(new_device, provider) diff --git a/rime/provider.py b/rime/provider.py index 438aa86..2b5f1e8 100644 --- a/rime/provider.py +++ b/rime/provider.py @@ -5,6 +5,7 @@ from abc import ABC, abstractmethod from .media import MediaData +from .filesystem.base import File class Provider(ABC): @@ -28,6 +29,13 @@ def subset(self, subsetter, events, contacts): """ return None + @abstractmethod + def all_files(self) -> list[File]: + """ + Return a list of all files associated with the app. + """ + return [] + @abstractmethod def search_events(self, device, filter_): """ @@ -57,6 +65,12 @@ def find_providers(fs) -> dict[str, Provider]: providers_dict = {} for provider in Provider.__subclasses__(): instance = provider.from_filesystem(fs) + + # Sanity check the providers... + if not provider.NAME or not provider.FRIENDLY_NAME: + raise ValueError(f'Provider {provider.__name__} has no NAME or FRIENDLY_NAME') + + # ... and store them. if instance: providers_dict[provider.NAME] = instance diff --git a/rime/providers/androidcontacts.py b/rime/providers/androidcontacts.py index b7daf29..ebf187f 100644 --- a/rime/providers/androidcontacts.py +++ b/rime/providers/androidcontacts.py @@ -138,25 +138,23 @@ def search_contacts(self, contacts_filter): } def subset(self, subsetter, events: Iterable[Event], contacts: Iterable[Contact]): - rows_contacts = subsetter.row_subset('contacts', '_id') - rows_raw_contacts = subsetter.row_subset('raw_contacts', '_id') - rows_data = subsetter.row_subset('data', 'raw_contact_id') - mimetypes = subsetter.complete_table('mimetypes') - - for contact in contacts: - if contact.providerName != self.NAME: - continue - - rows_contacts.add(contact.local_id) - rows_raw_contacts.update(contact.provider_data.raw_contact_row_ids) - rows_data.update(contact.provider_data.raw_contact_row_ids) - - subsetter.create_db_and_copy_rows(self.conn, self.DB_PATH, [ - rows_contacts, - rows_raw_contacts, - rows_data, - mimetypes - ]) + with subsetter.db_subset(src_conn=self.conn, new_db_pathname=self.DB_PATH) as db_subset: + rows_contacts = db_subset.row_subset('contacts', '_id') + rows_raw_contacts = db_subset.row_subset('raw_contacts', '_id') + rows_data = db_subset.row_subset('data', 'raw_contact_id') + db_subset.complete_table('mimetypes') + + for contact in contacts: + if contact.providerName != self.NAME: + continue + + rows_contacts.add(contact.local_id) + rows_raw_contacts.update(contact.provider_data.raw_contact_row_ids) + rows_data.update(contact.provider_data.raw_contact_row_ids) + + def all_files(self): + # TODO + return [] def _get_mime_types(self): """ diff --git a/rime/providers/androidgenericmedia.py b/rime/providers/androidgenericmedia.py index e118d95..64c20cc 100644 --- a/rime/providers/androidgenericmedia.py +++ b/rime/providers/androidgenericmedia.py @@ -59,6 +59,10 @@ def subset(self, subsetter, events, contacts): """ return None + def all_files(self): + # TODO + return [] + def search_events(self, device, filter_): """ Search for events matching ``filter_``, which is an EventFilter. diff --git a/rime/providers/androidtelephony.py b/rime/providers/androidtelephony.py index fe37468..6afd8c7 100644 --- a/rime/providers/androidtelephony.py +++ b/rime/providers/androidtelephony.py @@ -120,23 +120,22 @@ def subset(self, subsetter, events, contacts): """ Create a subset using the given events and contacts. """ - rows_sms = subsetter.row_subset("sms", "_id") - rows_threads = subsetter.row_subset('threads', '_id') - rows_address = subsetter.row_subset('canonical_addresses', '_id') + with subsetter.db_subset(src_conn=self.db, new_db_pathname=self.MMSSMS_DB) as subset_db: + rows_sms = subset_db.row_subset("sms", "_id") + rows_threads = subset_db.row_subset('threads', '_id') + rows_address = subset_db.row_subset('canonical_addresses', '_id') - rows_address.update( - contact.local_id for contact in contacts if contact.providerName == self.NAME - ) - rows_threads.update( - event.provider_data.threads_table_id for event in events if event.provider.NAME == self.NAME - ) - rows_sms.update(event.id_ for event in events if event.provider.NAME == self.NAME) + rows_address.update( + contact.local_id for contact in contacts if contact.providerName == self.NAME + ) + rows_threads.update( + event.provider_data.threads_table_id for event in events if event.provider.NAME == self.NAME + ) + rows_sms.update(event.id_ for event in events if event.provider.NAME == self.NAME) - subsetter.create_db_and_copy_rows(self.db, self.MMSSMS_DB, [ - rows_sms, - rows_threads, - rows_address, - ]) + def all_files(self): + # TODO + return [] @classmethod def from_filesystem(cls, fs): diff --git a/rime/providers/androidwhatsapp.py b/rime/providers/androidwhatsapp.py index 72c0b19..a3c18da 100644 --- a/rime/providers/androidwhatsapp.py +++ b/rime/providers/androidwhatsapp.py @@ -450,64 +450,58 @@ def subset(self, subsetter, events: Iterable[Event], contacts: Iterable[Contact] Create a WhatsApp subset using the provided events and contacts. """ # Copy the contacts - rows_wa_contacts = subsetter.row_subset("wa_contacts", "_id") - rows_wa_contacts.update(contact.provider_data.id_ for contact in contacts) - - # Copy session participants - rows_group_participant_user = subsetter.row_subset("group_participant_user", "_id") - - # Copy events - rows_message = subsetter.row_subset("message", "_id") - rows_message_media = subsetter.row_subset("message_media", "message_row_id") - rows_message_details = subsetter.row_subset("message_details", "message_row_id") - rows_jid = subsetter.row_subset("jid", "_id") - rows_chat = subsetter.row_subset("chat", "_id") - - for event in events: - # Reject if it's not one of ours. - if not isinstance(event, MessageEvent) or event.provider.NAME != self.NAME: - continue - - wa_message = event.provider_data - - rows_message.add(wa_message.message_row_id) - if event.sender and event.sender.provider_data: - rows_jid.update(jid_contact.id_ for jid_contact in event.sender.provider_data.jid_contacts) - - if event.session: - wa_session = event.session.provider_data - rows_group_participant_user.update(wa_session.group_participant_user_ids) - if wa_session.group_user_id: - rows_wa_contacts.add(wa_session.group_user_id) - if wa_session.group_jid_row_id: - rows_jid.add(wa_session.group_jid_row_id) - - rows_message_details.add(wa_message.message_row_id) - rows_chat.add(wa_message.chat_row_id) - rows_message_media.add(wa_message.message_row_id) - - # Write the message db. - subsetter.create_db_and_copy_rows(self.msgdb, self.MESSAGE_DB, [ - rows_message, - rows_message_details, - rows_message_media, - rows_jid, - rows_chat, - rows_group_participant_user, - ]) - - # Write the contacts DB. - subsetter.create_db_and_copy_rows(self.wadb, self.WA_DB, [rows_wa_contacts]) - - # copy media by copying each named file. - media_table = Table('message_media') - query = Query.from_(media_table) \ - .select('file_path') \ - .where(media_table.message_row_id.isin(rows_message_media.rows)) - - for row in self.msgdb.execute(query.get_sql()): - pathname = self._media_path(row[0]) - subsetter.copy_file(self.fs.open(pathname), pathname) + with subsetter.db_subset(src_conn=self.wadb, new_db_pathname=self.WA_DB) as subset_wadb, \ + subsetter.db_subset(src_conn=self.msgdb, new_db_pathname=self.MESSAGE_DB) as subset_msgdb: + + rows_wa_contacts = subset_wadb.row_subset("wa_contacts", "_id") + rows_wa_contacts.update(contact.provider_data.id_ for contact in contacts) + + # Copy session participants + rows_group_participant_user = subset_msgdb.row_subset("group_participant_user", "_id") + + # Copy events + rows_message = subset_msgdb.row_subset("message", "_id") + rows_message_media = subset_msgdb.row_subset("message_media", "message_row_id") + rows_message_details = subset_msgdb.row_subset("message_details", "message_row_id") + rows_jid = subset_msgdb.row_subset("jid", "_id") + rows_chat = subset_msgdb.row_subset("chat", "_id") + + for event in events: + # Reject if it's not one of ours. + if not isinstance(event, MessageEvent) or event.provider.NAME != self.NAME: + continue + + wa_message = event.provider_data + + rows_message.add(wa_message.message_row_id) + if event.sender and event.sender.provider_data: + rows_jid.update(jid_contact.id_ for jid_contact in event.sender.provider_data.jid_contacts) + + if event.session: + wa_session = event.session.provider_data + rows_group_participant_user.update(wa_session.group_participant_user_ids) + if wa_session.group_user_id: + rows_wa_contacts.add(wa_session.group_user_id) + if wa_session.group_jid_row_id: + rows_jid.add(wa_session.group_jid_row_id) + + rows_message_details.add(wa_message.message_row_id) + rows_chat.add(wa_message.chat_row_id) + rows_message_media.add(wa_message.message_row_id) + + # copy media by copying each named file. + media_table = Table('message_media') + query = Query.from_(media_table) \ + .select('file_path') \ + .where(media_table.message_row_id.isin(rows_message_media.rows)) + + for row in self.msgdb.execute(query.get_sql()): + pathname = self._media_path(row[0]) + subsetter.copy_file(self.fs.open(pathname), pathname) + + def all_files(self): + # TODO + return [] @classmethod def from_filesystem(cls, fs): diff --git a/rime/providers/imessage.py b/rime/providers/imessage.py index 00a961f..927ac24 100644 --- a/rime/providers/imessage.py +++ b/rime/providers/imessage.py @@ -152,32 +152,33 @@ def search_contacts(self, filter_): def subset(self, subsetter, events: Iterable[Event], contacts: Iterable[Contact]): """ """ - handle_rows = subsetter.row_subset('handle', 'ROWID') - handle_rows.update(contact.provider_data.row_id for contact in contacts if contact.providerName == self.NAME) - - message_rows = subsetter.row_subset('message', 'ROWID') - chat_rows = subsetter.row_subset('chat', 'ROWID') - chat_message_join_rows = subsetter.row_subset('chat_message_join', 'chat_id') - chat_handle_join_rows = subsetter.row_subset('chat_handle_join', 'chat_id') - - for event in events: - if not isinstance(event, MessageEvent) or event.provider != self: - continue - - message_rows.add(event.provider_data.message_row_id) - chat_rows.add(event.provider_data.chat_row_id) - chat_message_join_rows.add(event.provider_data.chat_row_id) - chat_handle_join_rows.add(event.provider_data.chat_row_id) - if event.session: - handle_rows.update(contact.provider_data.row_id for contact in event.session.participants) - - subsetter.create_db_and_copy_rows(self.conn, self.MESSAGE_DB, [ - handle_rows, - message_rows, - chat_rows, - chat_message_join_rows, - chat_handle_join_rows, - ]) + with subsetter.db_subset(src_conn=self.conn, new_db_pathname=self.MESSAGE_DB) as subset_db: + handle_rows = subset_db.row_subset('handle', 'ROWID') + handle_rows.update( + contact.provider_data.row_id + for contact in contacts + if contact.providerName == self.NAME + ) + + message_rows = subset_db.row_subset('message', 'ROWID') + chat_rows = subset_db.row_subset('chat', 'ROWID') + chat_message_join_rows = subset_db.row_subset('chat_message_join', 'chat_id') + chat_handle_join_rows = subset_db.row_subset('chat_handle_join', 'chat_id') + + for event in events: + if not isinstance(event, MessageEvent) or event.provider != self: + continue + + message_rows.add(event.provider_data.message_row_id) + chat_rows.add(event.provider_data.chat_row_id) + chat_message_join_rows.add(event.provider_data.chat_row_id) + chat_handle_join_rows.add(event.provider_data.chat_row_id) + if event.session: + handle_rows.update(contact.provider_data.row_id for contact in event.session.participants) + + def all_files(self): + # TODO + return [] @classmethod def from_filesystem(cls, fs): diff --git a/rime/providers/ioscontacts.py b/rime/providers/ioscontacts.py index b526f45..af1959e 100644 --- a/rime/providers/ioscontacts.py +++ b/rime/providers/ioscontacts.py @@ -96,18 +96,18 @@ def search_contacts(self, filter_): } def subset(self, subsetter, events: Iterable[Event], contacts: Iterable[Contact]): - abperson_rows = subsetter.row_subset('ABPerson', 'ROWID') - abmv_rows = subsetter.row_subset('ABMultiValue', 'record_id') + with subsetter.db_subset(src_conn=self.conn, new_db_pathname=self.DB_PATH) as subset_db: + abperson_rows = subset_db.row_subset('ABPerson', 'ROWID') + abmv_rows = subset_db.row_subset('ABMultiValue', 'record_id') - my_contact_ids = [contact.local_id for contact in contacts if contact.providerName == self.NAME] + my_contact_ids = [contact.local_id for contact in contacts if contact.providerName == self.NAME] - abperson_rows.update(my_contact_ids) - abmv_rows.update(my_contact_ids) + abperson_rows.update(my_contact_ids) + abmv_rows.update(my_contact_ids) - subsetter.create_db_and_copy_rows(self.conn, self.DB_PATH, [ - abperson_rows, - abmv_rows - ]) + def all_files(self): + # TODO + return [] @classmethod def from_filesystem(cls, fs): diff --git a/rime/providers/ioswhatsapp.py b/rime/providers/ioswhatsapp.py index 697a4ff..4a11b3a 100644 --- a/rime/providers/ioswhatsapp.py +++ b/rime/providers/ioswhatsapp.py @@ -153,40 +153,38 @@ def subset(self, subsetter, events, contacts): """ Create a subset using the given events and contacts. """ - rows_zwaprofilepushname = subsetter.row_subset('ZWAPROFILEPUSHNAME', 'Z_PK') - rows_zwagroupmember = subsetter.row_subset('ZWAGROUPMEMBER', 'Z_PK') - rows_zwachatsession = subsetter.row_subset('ZWACHATSESSION', 'Z_PK') - rows_zwamessage = subsetter.row_subset('ZWAMESSAGE', 'Z_PK') + with subsetter.db_subset(src_conn=self.msgdb, new_db_pathname=self.CHATSTORAGE_DB) as subset_db: + rows_zwaprofilepushname = subset_db.row_subset('ZWAPROFILEPUSHNAME', 'Z_PK') + rows_zwagroupmember = subset_db.row_subset('ZWAGROUPMEMBER', 'Z_PK') + rows_zwachatsession = subset_db.row_subset('ZWACHATSESSION', 'Z_PK') + rows_zwamessage = subset_db.row_subset('ZWAMESSAGE', 'Z_PK') - # Copy the "contacts" into ZWACHATSESSION, ZWAPROFILEPUSHNAME, and ZWAGROUPMEMBER. - for contact in contacts: - wa_contact = contact.provider_data + # Copy the "contacts" into ZWACHATSESSION, ZWAPROFILEPUSHNAME, and ZWAGROUPMEMBER. + for contact in contacts: + wa_contact = contact.provider_data - if wa_contact.profile_push_name_id: - rows_zwaprofilepushname.add(wa_contact.profile_push_name_id) + if wa_contact.profile_push_name_id: + rows_zwaprofilepushname.add(wa_contact.profile_push_name_id) - if wa_contact.group_member_pks: - rows_zwagroupmember.update(set(wa_contact.group_member_pks)) + if wa_contact.group_member_pks: + rows_zwagroupmember.update(set(wa_contact.group_member_pks)) - if wa_contact.chat_session_ids: - rows_zwachatsession.update(set(wa_contact.chat_session_ids)) + if wa_contact.chat_session_ids: + rows_zwachatsession.update(set(wa_contact.chat_session_ids)) - # Copy the events. - for event in events: - wa_event = event.provider_data + # Copy the events. + for event in events: + wa_event = event.provider_data - rows_zwamessage.add(event.id_) - rows_zwachatsession.add(wa_event.chat_session_id) + rows_zwamessage.add(event.id_) + rows_zwachatsession.add(wa_event.chat_session_id) - if wa_event.group_member: - rows_zwagroupmember.add(wa_event.group_member) + if wa_event.group_member: + rows_zwagroupmember.add(wa_event.group_member) - subsetter.create_db_and_copy_rows(self.msgdb, self.CHATSTORAGE_DB, [ - rows_zwaprofilepushname, - rows_zwagroupmember, - rows_zwachatsession, - rows_zwamessage, - ]) + def all_files(self): + # TODO + return [] def _create_session(self, session_id): chat_table = Table('ZWACHATSESSION') diff --git a/rime/providers/providernames.py b/rime/providers/providernames.py index d496550..2ac2486 100644 --- a/rime/providers/providernames.py +++ b/rime/providers/providernames.py @@ -4,6 +4,8 @@ ANDROID_CONTACTS = 'android-com.android.providers.contacts' ANDROID_CONTACTS_FRIENDLY = 'Android Contacts' +ANDROID_GENERIC = 'android-generic' +ANDROID_GENERIC_FRIENDLY = 'Android Generic' ANDROID_GENERIC_MEDIA = 'android-generic-media' ANDROID_GENERIC_MEDIA_FRIENDLY = 'Android Generic Media' ANDROID_TELEPHONY = "android-com.android.providers.telephony" @@ -24,6 +26,7 @@ FRIENDLY_NAMES = { ANDROID_CONTACTS: ANDROID_CONTACTS_FRIENDLY, ANDROID_GENERIC_MEDIA: ANDROID_GENERIC_MEDIA_FRIENDLY, + ANDROID_GENERIC: ANDROID_GENERIC_FRIENDLY, ANDROID_TELEPHONY: ANDROID_TELEPHONY_FRIENDLY, ANDROID_WHATSAPP: ANDROID_WHATSAPP_FRIENDLY, ANDROID_CAMERA: ANDROID_CAMERA_FRIENDLY, diff --git a/rime/subset.py b/rime/subset.py index f58bf49..487a9d7 100644 --- a/rime/subset.py +++ b/rime/subset.py @@ -5,8 +5,11 @@ """ Trace provider data access for subsetting. """ +from dataclasses import dataclass +from enum import Enum import re import shutil +from contextlib import contextmanager from .sql import Table, Query, Parameter @@ -40,7 +43,36 @@ def _copy_table(src_conn, dst_conn, table_name, add_where_clause_fn): dst_conn.execute(insert_query.get_sql(), row) -class RowSubset: +SubsetFillOption = Enum('SubsetFillOption', ('MINIMAL', 'UNUSED_TABLES', 'UNUSED_DBS_AND_TABLES')) + + +@dataclass(frozen=True) +class SubsetOptions: + """ + Should we copy additional unused tables and databases? + MINIMAL: Don't copy anything that isn't referenced by the subset. + UNUSED_TABLES: Create unused tables (but don't copy any data). + UNUSED_DBS_AND_TABLES: Create unused tables and databases (but don't copy any data). + """ + fill: SubsetFillOption = SubsetFillOption.MINIMAL + + anonymise: bool = False + + +class DeviceSubsetter: + def __init__(self, fs_dest, subset_options: SubsetOptions): + self._fs_dest = fs_dest + + self.options = subset_options + + def sqlite3_create(self, path): + return self._fs_dest.sqlite3_create(path) + + def create_file(self, path): + return self._fs_dest.create_file(path) + + +class _RowSubset: def __init__(self, table_name, primary_key): self.table_name = table_name self.primary_key = primary_key @@ -60,7 +92,7 @@ def copy(self, src_conn, dst_conn): lambda query, table: query.where(table[self.primary_key].isin(self.rows))) -class CompleteTable: +class _CompleteTable: def __init__(self, table_name): self.table_name = table_name @@ -68,27 +100,34 @@ def copy(self, src_conn, dst_conn): _copy_table(src_conn, dst_conn, self.table_name, None) -class Subsetter: - def __init__(self, fs_dest): - self._fs_dest = fs_dest +class _DbSubset: + def __init__(self): + self.subsets = [] def row_subset(self, table_name, primary_key): - return RowSubset(table_name, primary_key) + row_subset = _RowSubset(table_name, primary_key) + self.subsets.append(row_subset) + return row_subset def complete_table(self, table_name): - return CompleteTable(table_name) - - def sqlite3_create(self, path): - return self._fs_dest.sqlite3_create(path) + complete_table = _CompleteTable(table_name) + self.subsets.append(complete_table) + return complete_table + + +class ProviderSubsetter: + def __init__(self, device_subsetter: DeviceSubsetter, subset_options: SubsetOptions): + self._device_subsetter = device_subsetter + self.options = subset_options + + @contextmanager + def db_subset(self, *, src_conn, new_db_pathname): + db_subset = _DbSubset() + yield db_subset + with self._device_subsetter.sqlite3_create(new_db_pathname) as dst_conn: + for row_subset in db_subset.subsets: + row_subset.copy(src_conn, dst_conn) def copy_file(self, handle, dst_path): - with self._fs_dest.create_file(dst_path) as dest_handle: + with self._device_subsetter.create_file(dst_path) as dest_handle: shutil.copyfileobj(handle, dest_handle) - - def create_db_and_copy_rows(self, src_conn, new_db_pathname, row_subsets: list[RowSubset]): - """ - Convenience method to create a new database and copy row subsets from the source database. - """ - with self._fs_dest.sqlite3_create(new_db_pathname) as dst_conn: - for row_subset in row_subsets: - row_subset.copy(src_conn, dst_conn)