Skip to content

Commit

Permalink
file-transfer(cdk): rename transfer class
Browse files Browse the repository at this point in the history
  • Loading branch information
aldogonzalez8 committed Nov 6, 2024
1 parent 0e6732d commit ff2accf
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .jsonl_parser import JsonlParser
from .parquet_parser import ParquetParser
from .unstructured_parser import UnstructuredParser
from .blob_transfer import BlobTransfer
from .file_transfer import FileTransfer

default_parsers: Mapping[Type[Any], FileTypeParser] = {
AvroFormat: AvroParser(),
Expand All @@ -25,4 +25,4 @@
UnstructuredFormat: UnstructuredParser(),
}

__all__ = ["AvroParser", "CsvParser", "ExcelParser", "JsonlParser", "ParquetParser", "UnstructuredParser", "BlobTransfer", "default_parsers"]
__all__ = ["AvroParser", "CsvParser", "ExcelParser", "JsonlParser", "ParquetParser", "UnstructuredParser", "FileTransfer", "default_parsers"]
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"


class BlobTransfer:
class FileTransfer:
def __init__(self) -> None:
self._local_directory = AIRBYTE_STAGING_DIRECTORY if os.path.exists(AIRBYTE_STAGING_DIRECTORY) else DEFAULT_LOCAL_DIRECTORY

def write_streams(
def get_file(
self,
config: FileBasedStreamConfig,
file: RemoteFile,
Expand All @@ -29,7 +29,3 @@ def write_streams(
except Exception as ex:
logger.error("An error has occurred while getting file: %s", str(ex))
raise ex

@property
def file_read_mode(self) -> FileReadMode:
return FileReadMode.READ_BINARY
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
SchemaInferenceError,
StopSyncPerValidationPolicy,
)
from airbyte_cdk.sources.file_based.file_types import BlobTransfer
from airbyte_cdk.sources.file_based.file_types import FileTransfer
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType, file_transfer_schema, merge_schemas, schemaless_schema
from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream
Expand Down Expand Up @@ -126,8 +126,8 @@ def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[Airbyte
if self.use_file_transfer:
self.logger.info(f"{self.name}: {file} file-based syncing")
# todo: complete here the code to not rely on local parser
blob_transfer = BlobTransfer()
for record in blob_transfer.write_streams(self.config, file, self.stream_reader, self.logger):
file_transfer = FileTransfer()
for record in file_transfer.get_file(self.config, file, self.stream_reader, self.logger):
line_no += 1
if not self.record_passes_validation_policy(record):
n_skipped += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
from airbyte_cdk.sources.file_based.exceptions import FileBasedErrorsCollector, FileBasedSourceError
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
from airbyte_cdk.sources.file_based.file_types import BlobTransfer
from airbyte_cdk.sources.file_based.file_types import FileTransfer
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy
Expand Down Expand Up @@ -286,7 +286,7 @@ def setUp(self) -> None:

def test_when_read_records_from_slice_then_return_records(self) -> None:
"""Verify that we have the new file method and data is empty"""
with mock.patch.object(BlobTransfer, "write_streams", return_value=[self._A_RECORD]):
with mock.patch.object(FileTransfer, "get_file", return_value=[self._A_RECORD]):
messages = list(self._stream.read_records_from_slice({"files": [RemoteFile(uri="uri", last_modified=self._NOW)]}))
assert list(map(lambda message: message.record.file, messages)) == [self._A_RECORD]
assert list(map(lambda message: message.record.data, messages)) == [{}]
Expand Down

0 comments on commit ff2accf

Please sign in to comment.