Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
chore(s3): add loggers
Browse files Browse the repository at this point in the history
  • Loading branch information
Vixtir committed Sep 26, 2022
1 parent 5a37122 commit 91d3bfb
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 6 deletions.
1 change: 1 addition & 0 deletions odd_collector_aws/adapters/s3/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def _get_entities(self):
try:
logger.debug(f"Getting info for: {dataset}")
yield from self.s3_use_case.get_data_entities(dataset)
logger.debug(f"Getting info for: {dataset}")
except Exception:
logger.error(
f"Got unexpected error for {dataset.path}, SKIP object.",
Expand Down
10 changes: 9 additions & 1 deletion odd_collector_aws/adapters/s3/mapper/metadata_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pyarrow.dataset as ds

from odd_collector_aws.adapters.s3.file_system import FileSystem
from odd_collector_aws.adapters.s3.logger import logger
from odd_collector_aws.utils import parse_s3_url


Expand Down Expand Up @@ -63,10 +64,17 @@ def average_size(avg_file_size, n_files):

class FileMetadataExtractor(MetadataExtractor):
def extract(self) -> dict:
logger.info(f"Parse {self._original_path}")
bucket, key = parse_s3_url(self._original_path)
logger.info(f"Parsed {bucket}/{key}")

logger.info(f"Count rows")
rows = self._dataset.count_rows()
logger.info(f"Counted rows {rows}")

return {
"Format": self._dataset.format.default_extname,
"Rows": self._dataset.count_rows(),
"Rows": rows,
"Bucket": bucket,
"Key": key,
}
Expand Down
5 changes: 3 additions & 2 deletions odd_collector_aws/adapters/s3/strategies/fetch_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,13 @@ def get_datasets(self, dataset_config: DatasetConfig) -> List[Any]:

def create_s3_dataset_for_file(file_path: str, fs: FileSystem):
file_ext = get_file_extension(file_path)

logger.debug("validate")
validate_file_extension(file_ext)

dataset = fs.get_dataset(file_path, DATASETS_FN[file_ext].format)
logger.debug("extract metadata")
metadata = FileMetadataExtractor(file_path, dataset, fs).extract()

logger.debug("Metadata extracted")
return DATASETS_FN[file_ext](
dataset,
file_path,
Expand Down
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ repository = "https://github.com/opendatadiscovery/odd-collector-aws"

[tool.poetry.dependencies]
python = "^3.9"
oddrn-generator = "0.1.45"
oddrn-generator = "0.1.50"
python-dotenv = "0.19.0"
boto3 = "1.18.44"
lark-parser = "0.12.0"
Expand Down

0 comments on commit 91d3bfb

Please sign in to comment.