Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
chore: use escape fn from oddrn library (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
Vixtir authored Sep 14, 2022
1 parent 2e7c7f3 commit 5a37122
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 9 deletions.
8 changes: 5 additions & 3 deletions odd_collector_aws/adapters/s3/mapper/dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from typing import List, Dict, Any

from lark import Lark
Expand All @@ -10,6 +11,7 @@
DataEntityType,
)
from oddrn_generator.generators import S3Generator
from oddrn_generator.utils import escape
from pyarrow import Schema

from .s3_field_type_transformer import S3FieldTypeTransformer
Expand Down Expand Up @@ -69,12 +71,12 @@ def __parse(field_type: str) -> Dict[str, Any]:
return field_type_transformer.transform(column_tree)


def s3_path_to_name(path: str, joiner: str = ":") -> str:
def s3_path_to_name(path: str) -> str:
"""
Remove the bucket name from the path and return the name of the file.
"""
without_bucket = path.rstrip("/").split("/")[1:]
return joiner.join(without_bucket)

return escape(re.sub("^[a-zA-z-.\d]*\/", "", path.strip("/"), 1))


def map_dataset(
Expand Down
4 changes: 2 additions & 2 deletions odd_collector_aws/adapters/sagemaker/domain/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
DataInput,
)
from oddrn_generator import Generator
from oddrn_generator.utils import escape

from odd_collector_aws.const import S3_PATH_REPLACER
from odd_collector_aws.domain.to_data_entity import ToDataEntity
from odd_collector_aws.utils import parse_s3_url
from .base_sagemaker_entity import BaseSagemakerEntity
Expand Down Expand Up @@ -124,7 +124,7 @@ def create_model(uri: str, arn: str):

def create_dummy_dataset_artifact(uri: str, arn: str):
bucket, key = parse_s3_url(uri)
name = key.replace("/", S3_PATH_REPLACER)
name = escape(key)
return DummyDatasetArtifact(Name=name, Uri=uri, Arn=arn, ArtifactType="Dataset")


Expand Down
2 changes: 0 additions & 2 deletions odd_collector_aws/const.py
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
METADATA_PREFIX = "https://raw.githubusercontent.com/opendatadiscovery/opendatadiscovery-specification/main/specification/extensions"

S3_PATH_REPLACER = ":"
4 changes: 2 additions & 2 deletions tests/adapters/s3/test_s3_path_to_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@


def test_s3_path_to_name():
assert s3_path_to_name("bucket/path/to/file.csv", ":") == "path:to:file.csv"
assert s3_path_to_name("bucket/path/to/", ":") == "path:to"
assert s3_path_to_name("bucket/path/to/file.csv") == "path\\\\to\\\\file.csv"
assert s3_path_to_name("bucket/path/to/") == "path\\\\to"

0 comments on commit 5a37122

Please sign in to comment.