From d25fb44890c6eec031893754c2b956eca120ae76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Bierlein?= Date: Wed, 20 Mar 2024 23:22:58 +0100 Subject: [PATCH] Truncate token uri in logging (#99) Some URLs are very long (e.g. data urls), so we truncate them to make them easier to read in logs and use less space. This PR also fixes the foundation parser, which was broken, because the API it relied on doesn't exist anymore. It's now falling back to contract calls. --- docs/changelog.md | 5 ++ docs/index.md | 2 +- .../metadata/parsers/collection/foundation.py | 75 +++++++++++++++---- .../metadata/pipelines/metadata_pipeline.py | 19 +++-- pyproject.toml | 2 +- .../fetchers/test_metadata_fetcher.py | 2 +- .../parsers/test_foundation_parser.py | 19 ++--- 7 files changed, 87 insertions(+), 37 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 7e2687c..f373ddb 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,10 @@ # Changelog +## v0.3.1 + +- Trim token_uri in some log outputs, this is mainly useful for data uris that are too long and make logs unreadable +- Fix `FoundationParser`, the API it relied on doesn't exist anymore, so we are falling back to contract calls to get the metadata + ## v0.3.0 - Upgrade web3 to 6.11.3 diff --git a/docs/index.md b/docs/index.md index f4c0eb0..f1c3783 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # Getting Started -Documentation for version: **v0.3.0** +Documentation for version: **v0.3.1** ## Overview diff --git a/offchain/metadata/parsers/collection/foundation.py b/offchain/metadata/parsers/collection/foundation.py index acc5db3..901c48c 100644 --- a/offchain/metadata/parsers/collection/foundation.py +++ b/offchain/metadata/parsers/collection/foundation.py @@ -12,24 +12,69 @@ class FoundationParser(CollectionParser): _COLLECTION_ADDRESSES: list[str] = [CollectionAddress.FOUNDATION] - def parse_metadata(self, token: Token, raw_data: Optional[dict], *args, **kwargs) -> Optional[Metadata]: # type: ignore[no-untyped-def, type-arg] # noqa: E501 - if token.uri is None or raw_data is None: - token.uri = f"https://api.foundation.app/opensea/{token.token_id}" - raw_data = self.fetcher.fetch_content(token.uri) # type: ignore[assignment] - metadata = DefaultCatchallParser(self.fetcher).parse_metadata(token=token, raw_data=raw_data) # type: ignore[arg-type] # noqa: E501 + def _normalize_metadata(self, metadata: Optional[Metadata]) -> Optional[Metadata]: + if metadata is None: + return None + metadata.standard = None # type: ignore[union-attr] - if metadata.content.uri.endswith("glb"): # type: ignore[union-attr] - metadata.content.mime_type = "model/gltf-binary" # type: ignore[union-attr] + if ( + metadata + and metadata.image + and metadata.image.uri + and metadata.image.uri.endswith("glb") + ): + metadata.image.mime_type = "model/gltf-binary" return metadata - async def _gen_parse_metadata_impl(self, token: Token, raw_data: Optional[dict], *args, **kwargs) -> Optional[Metadata]: # type: ignore[no-untyped-def, type-arg] # noqa: E501 + def parse_metadata( + self, token: Token, raw_data: Optional[dict], *args, **kwargs + ) -> Optional[Metadata]: if token.uri is None or raw_data is None: - token.uri = f"https://api.foundation.app/opensea/{token.token_id}" - raw_data = await self.fetcher.gen_fetch_content(token.uri) # type: ignore[assignment] - metadata = await DefaultCatchallParser(self.fetcher).gen_parse_metadata(token=token, raw_data=raw_data) # type: ignore[arg-type] # noqa: E501 - metadata.standard = None # type: ignore[union-attr] - if metadata.content.uri.endswith("glb"): # type: ignore[union-attr] - metadata.content.mime_type = "model/gltf-binary" # type: ignore[union-attr] + token.uri = self.contract_caller.single_address_single_fn_many_args( + token.collection_address, + "tokenURI(uint256)", + ["string"], + [[token.token_id]], + )[0] + if token.uri is None: + return None - return metadata + content = self.fetcher.fetch_content(token.uri) + if content and isinstance(content, dict): + raw_data = content + + if raw_data is None: + return None + + metadata = DefaultCatchallParser(self.fetcher).parse_metadata( + token=token, raw_data=raw_data + ) + + return self._normalize_metadata(metadata) + + async def _gen_parse_metadata_impl( + self, token: Token, raw_data: Optional[dict], *args, **kwargs + ) -> Optional[Metadata]: + if token.uri is None or raw_data is None: + token.uri = await self.contract_caller.rpc.async_reader.call_function( + token.collection_address, + "tokenURI(uint256)", + ["string"], + [token.token_id], + ) + if token.uri is None: + return None + + content = await self.fetcher.gen_fetch_content(token.uri) + if content and isinstance(content, dict): + raw_data = content + + if raw_data is None: + return None + + metadata = await DefaultCatchallParser(self.fetcher).gen_parse_metadata( + token=token, raw_data=raw_data + ) + + return self._normalize_metadata(metadata) diff --git a/offchain/metadata/pipelines/metadata_pipeline.py b/offchain/metadata/pipelines/metadata_pipeline.py index eb3c7b9..8fac2ec 100644 --- a/offchain/metadata/pipelines/metadata_pipeline.py +++ b/offchain/metadata/pipelines/metadata_pipeline.py @@ -25,6 +25,15 @@ ) +def _truncate_uri(uri: str, max_length: int = 100) -> str: + if len(uri) <= max_length: + return uri + + keep_length = (max_length - 3) // 2 # 3 is for the '...' + + return uri[:keep_length] + "..." + uri[-keep_length:] + + class MetadataPipeline(BasePipeline): """Pipeline for processing NFT metadata. @@ -161,7 +170,7 @@ def fetch_token_metadata( try: raw_data = self.fetcher.fetch_content(token.uri) except Exception as e: - error_message = f"({token.chain_identifier}-{token.collection_address}-{token.token_id}) Failed to parse token uri: {token.uri}. {str(e)}" # noqa: E501 + error_message = f"({token.chain_identifier}-{token.collection_address}-{token.token_id}) Failed to parse token uri: {_truncate_uri(token.uri)}. {str(e)}" # noqa: E501 logger.error(error_message) possible_metadatas_or_errors.append( MetadataProcessingError.from_token_and_error( @@ -216,9 +225,9 @@ async def gen_fetch_token_metadata( Union[Metadata, MetadataProcessingError]: returns either a Metadata or a MetadataProcessingError if unable to parse. """ - possible_metadatas_or_errors: list[ - Union[Metadata, MetadataProcessingError] - ] = [] + possible_metadatas_or_errors: list[Union[Metadata, MetadataProcessingError]] = ( + [] + ) if not token.uri: return MetadataProcessingError.from_token_and_error( @@ -230,7 +239,7 @@ async def gen_fetch_token_metadata( try: raw_data = await self.fetcher.gen_fetch_content(token.uri) except Exception as e: - error_message = f"({token.chain_identifier}-{token.collection_address}-{token.token_id}) Failed to parse token uri: {token.uri}. {str(e)}" # noqa: E501 + error_message = f"({token.chain_identifier}-{token.collection_address}-{token.token_id}) Failed to parse token uri: {_truncate_uri(token.uri)}. {str(e)}" # noqa: E501 logger.error(error_message) possible_metadatas_or_errors.append( MetadataProcessingError.from_token_and_error( diff --git a/pyproject.toml b/pyproject.toml index 769eb7b..b602961 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "offchain" -version = "0.3.0" +version = "0.3.1" description = "Open source metadata processing framework" authors = ["Zora eng "] readme = "README.md" diff --git a/tests/metadata/fetchers/test_metadata_fetcher.py b/tests/metadata/fetchers/test_metadata_fetcher.py index a24bec8..67240d7 100644 --- a/tests/metadata/fetchers/test_metadata_fetcher.py +++ b/tests/metadata/fetchers/test_metadata_fetcher.py @@ -54,7 +54,7 @@ async def test_gen_fetch_base_adapter(self): # type: ignore[no-untyped-def] async def test_gen_fetch_mime_type_and_size(self): # type: ignore[no-untyped-def] fetcher = MetadataFetcher() result = await fetcher.gen_fetch_mime_type_and_size( - "https://ipfs.io/ipfs/QmQaYaf3Q2oCBaUfUvV6mBP58EjbUTbMk6dC1o4YGjeWCo" + "https://ipfs.decentralized-content.com/ipfs/QmQaYaf3Q2oCBaUfUvV6mBP58EjbUTbMk6dC1o4YGjeWCo" ) assert result == ("image/png", "2887641") # type: ignore[comparison-overlap] print(result) diff --git a/tests/metadata/parsers/test_foundation_parser.py b/tests/metadata/parsers/test_foundation_parser.py index b6fdf38..7763074 100644 --- a/tests/metadata/parsers/test_foundation_parser.py +++ b/tests/metadata/parsers/test_foundation_parser.py @@ -22,9 +22,7 @@ class TestFoundationParser: raw_data = { "name": "Experiment #0004", "description": "They rise again!", - "image": "https://d1hiserqh6k9o1.cloudfront.net/Ax/kk/QmWwB2LXk7VKu5KtDrtUYdwpHK1NvJ49XrQvFRJxqiAxkk/nft.png", - "animation_url": "ipfs://QmWwB2LXk7VKu5KtDrtUYdwpHK1NvJ49XrQvFRJxqiAxkk/nft.glb", - "external_url": "https://foundation.app/@pw_3Dlab/foundation/113384", + "image": "ipfs://QmWwB2LXk7VKu5KtDrtUYdwpHK1NvJ49XrQvFRJxqiAxkk/nft.glb", } def test_foundation_parser_should_parse_token(self): # type: ignore[no-untyped-def] @@ -37,7 +35,7 @@ def test_foundation_parser_parses_metadata(self): # type: ignore[no-untyped-def fetcher = MetadataFetcher() contract_caller = ContractCaller() fetcher.fetch_mime_type_and_size = MagicMock(return_value=("application/json", 0)) # type: ignore[assignment] - fetcher.fetch_content = MagicMock(return_value=self.raw_data) # type: ignore[assignment] + fetcher.fetch_content = MagicMock(return_value=None) # type: ignore[assignment] parser = FoundationParser(fetcher=fetcher, contract_caller=contract_caller) # type: ignore[abstract] metadata = parser.parse_metadata(token=self.token, raw_data=self.raw_data) assert metadata == Metadata( @@ -45,14 +43,12 @@ def test_foundation_parser_parses_metadata(self): # type: ignore[no-untyped-def chain_identifier="ETHEREUM-MAINNET", collection_address="0x3b3ee1931dc30c1957379fac9aba94d1c48a5405", token_id=113384, - uri="https://api.foundation.app/opensea/113384", + uri="ipfs://QmRxAiR7FsT78mLcyMiE1p86a77gBQVJGWfGRADtMwqyEe/metadata.json", ), raw_data={ "name": "Experiment #0004", "description": "They rise again!", - "image": "https://d1hiserqh6k9o1.cloudfront.net/Ax/kk/QmWwB2LXk7VKu5KtDrtUYdwpHK1NvJ49XrQvFRJxqiAxkk/nft.png", - "animation_url": "ipfs://QmWwB2LXk7VKu5KtDrtUYdwpHK1NvJ49XrQvFRJxqiAxkk/nft.glb", - "external_url": "https://foundation.app/@pw_3Dlab/foundation/113384", + "image": "ipfs://QmWwB2LXk7VKu5KtDrtUYdwpHK1NvJ49XrQvFRJxqiAxkk/nft.glb", }, attributes=[], standard=None, @@ -60,17 +56,12 @@ def test_foundation_parser_parses_metadata(self): # type: ignore[no-untyped-def description="They rise again!", mime_type="application/json", image=MediaDetails( - size=0, - sha256=None, - uri="https://d1hiserqh6k9o1.cloudfront.net/Ax/kk/QmWwB2LXk7VKu5KtDrtUYdwpHK1NvJ49XrQvFRJxqiAxkk/nft.png", - mime_type="application/json", - ), - content=MediaDetails( size=0, sha256=None, uri="ipfs://QmWwB2LXk7VKu5KtDrtUYdwpHK1NvJ49XrQvFRJxqiAxkk/nft.glb", mime_type="model/gltf-binary", ), + content=None, additional_fields=[], )