From 09a120dd6fa913ac17426ab2afd9492c8174dcd2 Mon Sep 17 00:00:00 2001 From: Luke Yang Date: Thu, 9 Nov 2023 13:41:43 -0500 Subject: [PATCH] BACK-1791: ensure prop IPFS gateway gets used instead of pinata --- offchain/metadata/fetchers/base_fetcher.py | 3 +- .../metadata/fetchers/metadata_fetcher.py | 62 +++-- .../metadata/pipelines/metadata_pipeline.py | 12 +- offchain/utils/utils.py | 2 +- .../fetchers/test_metadata_fetcher.py | 237 ++++++++++-------- .../pipelines/test_metadata_pipeline.py | 58 +++++ 6 files changed, 243 insertions(+), 131 deletions(-) diff --git a/offchain/metadata/fetchers/base_fetcher.py b/offchain/metadata/fetchers/base_fetcher.py index c54c075..df3df3f 100644 --- a/offchain/metadata/fetchers/base_fetcher.py +++ b/offchain/metadata/fetchers/base_fetcher.py @@ -40,7 +40,8 @@ def set_max_retries(self, new_max_retries: int): # type: ignore[no-untyped-def] pass def register_adapter(self, adapter: Adapter, url_prefix: str): # type: ignore[no-untyped-def] # noqa: E501 - """Register an adapter to a url prefix. + """Register an adapter to a url prefix. Note this only affects synchronous http + requests (via the requests library). Args: adapter (Adapter): an Adapter instance to register. diff --git a/offchain/metadata/fetchers/metadata_fetcher.py b/offchain/metadata/fetchers/metadata_fetcher.py index 4ee38de..8118775 100644 --- a/offchain/metadata/fetchers/metadata_fetcher.py +++ b/offchain/metadata/fetchers/metadata_fetcher.py @@ -26,14 +26,23 @@ def __init__( max_retries: int = 0, async_adapter_configs: Optional[list[AdapterConfig]] = None, ) -> None: + from offchain.metadata.pipelines.metadata_pipeline import ( + DEFAULT_ADAPTER_CONFIGS, + ) + self.timeout = timeout self.max_retries = max_retries self.sess = requests.Session() self.async_sess = httpx.AsyncClient() - self.async_adapter_configs = async_adapter_configs + self.async_adapter_configs = ( + DEFAULT_ADAPTER_CONFIGS + if async_adapter_configs is None + else async_adapter_configs + ) def register_adapter(self, adapter: Adapter, url_prefix: str): # type: ignore[no-untyped-def] # noqa: E501 - """Register an adapter to a url prefix. + """Register an adapter to a url prefix. Note this only affects synchronous http + requests (via the requests library). Args: adapter (Adapter): an Adapter instance to register. @@ -57,6 +66,25 @@ def set_timeout(self, timeout: int): # type: ignore[no-untyped-def] """ self.timeout = timeout + def _get_async_adapter_for_uri(self, uri: str) -> Optional[Adapter]: + if self.async_adapter_configs is not None: + for async_adapter_config in self.async_adapter_configs: + if any( + uri.startswith(prefix) + for prefix in async_adapter_config.mount_prefixes + ): + logger.debug( + f"Selected {async_adapter_config.adapter_cls.__name__} for making async http requests for uri={uri}" # noqa: E501 + ) + return async_adapter_config.adapter_cls( + host_prefixes=async_adapter_config.host_prefixes, + **async_adapter_config.kwargs, + ) + logger.warning( + f"Unable to selected an adapter for async http requests for uri={uri}" + ) + return None + def _head(self, uri: str): # type: ignore[no-untyped-def] return self.sess.head(uri, timeout=self.timeout, allow_redirects=True) @@ -64,28 +92,16 @@ def _get(self, uri: str): # type: ignore[no-untyped-def] return self.sess.get(uri, timeout=self.timeout, allow_redirects=True) async def _gen(self, uri: str, method: Optional[str] = "GET") -> httpx.Response: - from offchain.metadata.pipelines.metadata_pipeline import ( - DEFAULT_ADAPTER_CONFIGS, - ) - - configs = DEFAULT_ADAPTER_CONFIGS - - if self.async_adapter_configs: - configs = self.async_adapter_configs - - for adapter_config in configs: - if any(uri.startswith(prefix) for prefix in adapter_config.mount_prefixes): - adapter = adapter_config.adapter_cls( - host_prefixes=adapter_config.host_prefixes, **adapter_config.kwargs + async_adapter = self._get_async_adapter_for_uri(uri) + if async_adapter is not None: + if method == "HEAD": + return await async_adapter.gen_head( + url=uri, timeout=self.timeout, sess=self.async_sess + ) + else: + return await async_adapter.gen_send( + url=uri, timeout=self.timeout, sess=self.async_sess ) - if method == "HEAD": - return await adapter.gen_head( - url=uri, timeout=self.timeout, sess=self.async_sess - ) - else: - return await adapter.gen_send( - url=uri, timeout=self.timeout, sess=self.async_sess - ) return await self.async_sess.get( uri, timeout=self.timeout, follow_redirects=True ) diff --git a/offchain/metadata/pipelines/metadata_pipeline.py b/offchain/metadata/pipelines/metadata_pipeline.py index 746095e..8746964 100644 --- a/offchain/metadata/pipelines/metadata_pipeline.py +++ b/offchain/metadata/pipelines/metadata_pipeline.py @@ -31,11 +31,6 @@ kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0}, ), AdapterConfig(adapter_cls=DataURIAdapter, mount_prefixes=["data:"]), - AdapterConfig( - adapter_cls=HTTPAdapter, - mount_prefixes=["https://", "http://"], - kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0}, - ), AdapterConfig( adapter_cls=IPFSAdapter, mount_prefixes=[ @@ -46,6 +41,11 @@ host_prefixes=["https://gateway.pinata.cloud/ipfs/"], kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0}, ), + AdapterConfig( + adapter_cls=HTTPAdapter, + mount_prefixes=["https://", "http://"], + kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0}, + ), ] DEFAULT_PARSERS = ( @@ -66,7 +66,7 @@ class MetadataPipeline(BasePipeline): mime type, and size by making network requests. parsers (list[BaseParser], optional): a list of parser instances for parsing token metadata. adapter_configs: (list[AdapterConfig], optional): a list of adapter configs used to register adapters - to specified url prefixes. + to specified url prefixes. This configuration affects both sync and async requests. """ # noqa: E501 def __init__( diff --git a/offchain/utils/utils.py b/offchain/utils/utils.py index 57cbbc4..7c9cf91 100644 --- a/offchain/utils/utils.py +++ b/offchain/utils/utils.py @@ -33,7 +33,7 @@ async def wrapped(*args, **kwargs): # type: ignore[no-untyped-def] logger.error(msg) if not silent: raise - logger.warn(msg) + logger.warning(msg) await asyncio.sleep(retry_delay) return None diff --git a/tests/metadata/fetchers/test_metadata_fetcher.py b/tests/metadata/fetchers/test_metadata_fetcher.py index 09f405b..a24bec8 100644 --- a/tests/metadata/fetchers/test_metadata_fetcher.py +++ b/tests/metadata/fetchers/test_metadata_fetcher.py @@ -2,7 +2,12 @@ from pytest_httpx import HTTPXMock -from offchain.metadata.adapters.ipfs import IPFSAdapter +from offchain.metadata.adapters.base_adapter import AdapterConfig +from offchain.metadata.adapters import ( + ARWeaveAdapter, + HTTPAdapter, + IPFSAdapter, +) from offchain.metadata.fetchers.metadata_fetcher import MetadataFetcher @@ -13,103 +18,135 @@ def test_metadata_fetcher_register_adapter(self): # type: ignore[no-untyped-def fetcher.register_adapter(adapter, "ipfs://") assert fetcher.sess.adapters.get("ipfs://") == adapter + @pytest.mark.asyncio + async def test_gen_fetch_data_adapter(self): # type: ignore[no-untyped-def] + fetcher = MetadataFetcher() + content = await fetcher.gen_fetch_content( + "" # noqa + ) + assert content is not None + + @pytest.mark.asyncio + async def test_gen_fetch_ipfs_adapter(self): # type: ignore[no-untyped-def] + fetcher = MetadataFetcher() + content = await fetcher.gen_fetch_content( + "ipfs://bafkreiboyxwytfyufln3uzyzaixslzvmrqs5ezjo2cio2fymfqf6u57u6u" # noqa + ) + assert content is not None + + @pytest.mark.asyncio + async def test_gen_fetch_arweave_adapter(self): # type: ignore[no-untyped-def] + fetcher = MetadataFetcher() + content = await fetcher.gen_fetch_content( + "ar://-G92LjB-wFj-FCGx040NgniW_Ypy_Cbh3Jq1HUD6l7A" # noqa + ) + assert content is not None + + @pytest.mark.asyncio + async def test_gen_fetch_base_adapter(self): # type: ignore[no-untyped-def] + fetcher = MetadataFetcher() + content = await fetcher.gen_fetch_content( + "https://meta.sadgirlsbar.io/8403.json" # noqa + ) + assert content is not None -@pytest.mark.asyncio -async def test_gen_fetch_data_adapter(): # type: ignore[no-untyped-def] - fetcher = MetadataFetcher() - content = await fetcher.gen_fetch_content( - "" # noqa - ) - assert content is not None - - -@pytest.mark.asyncio -async def test_gen_fetch_ipfs_adapter(): # type: ignore[no-untyped-def] - fetcher = MetadataFetcher() - content = await fetcher.gen_fetch_content( - "ipfs://bafkreiboyxwytfyufln3uzyzaixslzvmrqs5ezjo2cio2fymfqf6u57u6u" # noqa - ) - assert content is not None - - -@pytest.mark.asyncio -async def test_gen_fetch_arweave_adapter(): # type: ignore[no-untyped-def] - fetcher = MetadataFetcher() - content = await fetcher.gen_fetch_content( - "ar://-G92LjB-wFj-FCGx040NgniW_Ypy_Cbh3Jq1HUD6l7A" # noqa - ) - assert content is not None - - -@pytest.mark.asyncio -async def test_gen_fetch_base_adapter(): # type: ignore[no-untyped-def] - fetcher = MetadataFetcher() - content = await fetcher.gen_fetch_content( - "https://meta.sadgirlsbar.io/8403.json" # noqa - ) - assert content is not None - - -@pytest.mark.asyncio -async def test_gen_fetch_mime_type_and_size(): # type: ignore[no-untyped-def] - fetcher = MetadataFetcher() - result = await fetcher.gen_fetch_mime_type_and_size( - "https://ipfs.io/ipfs/QmQaYaf3Q2oCBaUfUvV6mBP58EjbUTbMk6dC1o4YGjeWCo" - ) - assert result == ("image/png", "2887641") # type: ignore[comparison-overlap] - print(result) - - -@pytest.mark.asyncio -async def test_gen_fetch_mime_type_and_size_http(httpx_mock: HTTPXMock): # type: ignore[no-untyped-def] - expected_headers = {"content-type": "image/png", "content-length": "99639"} - httpx_mock.add_response(method="HEAD", headers=expected_headers) - fetcher = MetadataFetcher() - result = await fetcher.gen_fetch_mime_type_and_size( - "https://d4ldbtmwfs9ii.cloudfront.net/7273.png" # noqa - ) - assert result == ( - expected_headers["content-type"], - expected_headers["content-length"], - ) - - -@pytest.mark.asyncio -async def test_gen_fetch_mime_type_and_size_ipfs(httpx_mock: HTTPXMock): # type: ignore[no-untyped-def] - expected_headers = {"content-type": "image/png", "content-length": "1251767"} - httpx_mock.add_response(method="HEAD", headers=expected_headers) - fetcher = MetadataFetcher() - result = await fetcher.gen_fetch_mime_type_and_size( - "ipfs://QmV4MseQF2QDDYbmxtg7eEQ9vMuYNntPQrR3arXHnK4yGX/150.png" - ) - assert result == ( - expected_headers["content-type"], - expected_headers["content-length"], - ) - - -@pytest.mark.asyncio -async def test_gen_fetch_mime_type_and_size_arweave(httpx_mock: HTTPXMock): # type: ignore[no-untyped-def] - expected_headers = {"content-type": "image/png", "content-length": "235779"} - httpx_mock.add_response(method="HEAD", headers=expected_headers) - fetcher = MetadataFetcher() - result = await fetcher.gen_fetch_mime_type_and_size( - "ar://veLMprs2c--Rl6nXCeakR5FG9K8y4WXt62iLxayrflo/1032.png" - ) - assert result == ( - expected_headers["content-type"], - expected_headers["content-length"], - ) - - -@pytest.mark.asyncio -async def test_gen_fetch_mime_type_and_size_data(httpx_mock: HTTPXMock): # type: ignore[no-untyped-def] - expected_headers = {"content-type": "image/svg+xml", "content-length": "1853"} - fetcher = MetadataFetcher() - result = await fetcher.gen_fetch_mime_type_and_size( - "" # noqa - ) - assert result == ( - expected_headers["content-type"], - expected_headers["content-length"], - ) + @pytest.mark.asyncio + async def test_gen_fetch_mime_type_and_size(self): # type: ignore[no-untyped-def] + fetcher = MetadataFetcher() + result = await fetcher.gen_fetch_mime_type_and_size( + "https://ipfs.io/ipfs/QmQaYaf3Q2oCBaUfUvV6mBP58EjbUTbMk6dC1o4YGjeWCo" + ) + assert result == ("image/png", "2887641") # type: ignore[comparison-overlap] + print(result) + + @pytest.mark.asyncio + async def test_gen_fetch_mime_type_and_size_http(self, httpx_mock: HTTPXMock): # type: ignore[no-untyped-def] + expected_headers = {"content-type": "image/png", "content-length": "99639"} + httpx_mock.add_response(method="HEAD", headers=expected_headers) + fetcher = MetadataFetcher() + result = await fetcher.gen_fetch_mime_type_and_size( + "https://d4ldbtmwfs9ii.cloudfront.net/7273.png" # noqa + ) + assert result == ( + expected_headers["content-type"], + expected_headers["content-length"], + ) + + @pytest.mark.asyncio + async def test_gen_fetch_mime_type_and_size_ipfs(self, httpx_mock: HTTPXMock): # type: ignore[no-untyped-def] + expected_headers = {"content-type": "image/png", "content-length": "1251767"} + httpx_mock.add_response(method="HEAD", headers=expected_headers) + fetcher = MetadataFetcher() + result = await fetcher.gen_fetch_mime_type_and_size( + "ipfs://QmV4MseQF2QDDYbmxtg7eEQ9vMuYNntPQrR3arXHnK4yGX/150.png" + ) + assert result == ( + expected_headers["content-type"], + expected_headers["content-length"], + ) + + @pytest.mark.asyncio + async def test_gen_fetch_mime_type_and_size_arweave(self, httpx_mock: HTTPXMock): # type: ignore[no-untyped-def] + expected_headers = {"content-type": "image/png", "content-length": "235779"} + httpx_mock.add_response(method="HEAD", headers=expected_headers) + fetcher = MetadataFetcher() + result = await fetcher.gen_fetch_mime_type_and_size( + "ar://veLMprs2c--Rl6nXCeakR5FG9K8y4WXt62iLxayrflo/1032.png" + ) + assert result == ( + expected_headers["content-type"], + expected_headers["content-length"], + ) + + @pytest.mark.asyncio + async def test_gen_fetch_mime_type_and_size_data(self): # type: ignore[no-untyped-def] + expected_headers = {"content-type": "image/svg+xml", "content-length": "1853"} + fetcher = MetadataFetcher() + result = await fetcher.gen_fetch_mime_type_and_size( + "" # noqa + ) + assert result == ( + expected_headers["content-type"], + expected_headers["content-length"], + ) + + @pytest.mark.asyncio + async def test_gen_async_adapter(self, httpx_mock: HTTPXMock): + ADAPTER_CONFIGS: list[AdapterConfig] = [ + AdapterConfig( + adapter_cls=ARWeaveAdapter, + mount_prefixes=["ar://"], + host_prefixes=["https://arweave.net/"], + ), + AdapterConfig( + adapter_cls=IPFSAdapter, + mount_prefixes=[ + "ipfs://", + "https://gateway.pinata.cloud/", + "https://ipfs.io/", + ], + host_prefixes=["https://gateway.pinata.cloud/ipfs/"], + ), + AdapterConfig( + adapter_cls=HTTPAdapter, + mount_prefixes=["https://", "http://"], + ), + ] + + ARWEAVE_URI = "ar://-G92LjB-wFj-FCGx040NgniW_Ypy_Cbh3Jq1HUD6l7A" + IPFS_URIs = [ + "ipfs://bafkreiboyxwytfyufln3uzyzaixslzvmrqs5ezjo2cio2fymfqf6u57u6u", + "https://gateway.pinata.cloud/ipfs/QmY3Lz7DfQPtPkK4n5StZcqc2zA6cmJC7wcAgzYXvGQLGm/485", + "https://ipfs.io/ipfs/QmQaYaf3Q2oCBaUfUvV6mBP58EjbUTbMk6dC1o4YGjeWCo", + ] + HTTPS_URI = "https://ipfs.decentralized-content.com/ipfs/QmY3Lz7DfQPtPkK4n5StZcqc2zA6cmJC7wcAgzYXvGQLGm/485" + fetcher = MetadataFetcher(async_adapter_configs=ADAPTER_CONFIGS) + + assert isinstance( + fetcher._get_async_adapter_for_uri(ARWEAVE_URI), ARWeaveAdapter + ) + for IPFS_URI in IPFS_URIs: + assert isinstance(fetcher._get_async_adapter_for_uri(IPFS_URI), IPFSAdapter) + assert isinstance(fetcher._get_async_adapter_for_uri(IPFS_URI), IPFSAdapter) + assert isinstance(fetcher._get_async_adapter_for_uri(IPFS_URI), IPFSAdapter) + assert isinstance(fetcher._get_async_adapter_for_uri(HTTPS_URI), HTTPAdapter) diff --git a/tests/metadata/pipelines/test_metadata_pipeline.py b/tests/metadata/pipelines/test_metadata_pipeline.py index e007f0f..6796782 100644 --- a/tests/metadata/pipelines/test_metadata_pipeline.py +++ b/tests/metadata/pipelines/test_metadata_pipeline.py @@ -1,5 +1,6 @@ # flake8: noqa: E501 +from pytest_httpx import HTTPXMock from typing import Tuple from unittest.mock import AsyncMock, MagicMock @@ -19,6 +20,7 @@ from offchain.metadata.models.metadata_processing_error import MetadataProcessingError from offchain.metadata.models.token import Token from offchain.metadata.pipelines.metadata_pipeline import ( # type: ignore[attr-defined] + DEFAULT_ADAPTER_CONFIGS, AdapterConfig, MetadataPipeline, ) @@ -59,6 +61,62 @@ def test_metadata_pipeline_mounts_adapters(self): # type: ignore[no-untyped-def == ipfs_adapter ) + @pytest.mark.asyncio + async def test_ipfs_adapter_uses_specified_ipfs_provider( + self, httpx_mock: HTTPXMock + ): + # integration test, the following setup reflects usage in prod + IPFS_PROVIDER = "https://ipfs.decentralized-content.com/ipfs/" + + def set_async_adapters() -> list[AdapterConfig]: + async_adapters = [] + for adapter in DEFAULT_ADAPTER_CONFIGS: + if adapter.adapter_cls is IPFSAdapter: + ipfs_adapter = AdapterConfig( + adapter_cls=IPFSAdapter, + mount_prefixes=[ + "ipfs://", + "https://gateway.pinata.cloud/", + "https://ipfs.io/", + "https://ipfs.decentralized-content.com/", + ], + host_prefixes=[IPFS_PROVIDER], + ) + async_adapters.append(ipfs_adapter) + + else: + async_adapters.append(adapter) + + return async_adapters + + adapters = set_async_adapters() + pipeline = MetadataPipeline(adapter_configs=adapters) + + httpx_mock.add_response( + json=[ + { + "name": "Beast #485", + "image": "https://gateway.pinata.cloud/ipfs/QmcimtwbWGKXLJ3pTMRu2ncEeeuK9DUwYye6uhJhZC9C6A/beast485.png", + "external_url": "https://tierzeronft.com/", + "attributes": [ + {"trait_type": "Background", "value": "Blue"}, + {"trait_type": "Fur", "value": "Dark Grey"}, + {"trait_type": "Shoes", "value": "Feet"}, + {"trait_type": "Eyes", "value": "Green"}, + {"trait_type": "Hat", "value": "Headset"}, + {"trait_type": "Unit", "value": "Unit I"}, + ], + } + ], + url=f"{IPFS_PROVIDER}QmY3Lz7DfQPtPkK4n5StZcqc2zA6cmJC7wcAgzYXvGQLGm/485", + ) + content = await pipeline.fetcher.gen_fetch_content( + "https://gateway.pinata.cloud/ipfs/QmY3Lz7DfQPtPkK4n5StZcqc2zA6cmJC7wcAgzYXvGQLGm/485" + ) + assert ( + content is not None + ), "Call to gateway.pinata.cloud did not get redirected to ipfs.decentralized-content.com" + def test_metadata_pipeline_fetch_token_uri(self, raw_crypto_coven_metadata): # type: ignore[no-untyped-def] token = Token( chain_identifier="ETHEREUM-MAINNET",