From 087b57e15762346bc542a693afb17daf616faa22 Mon Sep 17 00:00:00 2001 From: Luke Yang Date: Tue, 9 Apr 2024 20:09:23 -0400 Subject: [PATCH] BACK-2650: fix DataURIAdapter re plain text json data uri --- docs/changelog.md | 4 +++ docs/index.md | 2 +- offchain/metadata/adapters/data_uri.py | 5 +++ pyproject.toml | 2 +- tests/metadata/adapters/test_data_adapter.py | 35 ++++++++++++++++++++ tests/metadata/parsers/test_zora_parser.py | 8 ++--- 6 files changed, 50 insertions(+), 6 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index f373ddb..d1776e1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,9 @@ # Changelog +## v0.3.2 + +- Fix an issue in `DataURIAdapter` where plain-text json data uri would get ignored + ## v0.3.1 - Trim token_uri in some log outputs, this is mainly useful for data uris that are too long and make logs unreadable diff --git a/docs/index.md b/docs/index.md index f1c3783..8630f6b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # Getting Started -Documentation for version: **v0.3.1** +Documentation for version: **v0.3.2** ## Overview diff --git a/offchain/metadata/adapters/data_uri.py b/offchain/metadata/adapters/data_uri.py index bcc9a48..094f928 100644 --- a/offchain/metadata/adapters/data_uri.py +++ b/offchain/metadata/adapters/data_uri.py @@ -3,6 +3,7 @@ from urllib.request import urlopen import httpx +import json from requests import PreparedRequest, Response from offchain.metadata.adapters.base_adapter import BaseAdapter @@ -17,6 +18,10 @@ def decode_data_url(data_url): # type: ignore[no-untyped-def] decoded_data = base64.b64decode(data) decoded_text = decoded_data.decode("utf-8") return decoded_text + elif "json;utf8" in data_parts[0]: + decoded_data = urlopen(data_url).read() + decoded_text = json.dumps(json.loads(decoded_data)) + return decoded_text return None diff --git a/pyproject.toml b/pyproject.toml index b602961..2e585c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "offchain" -version = "0.3.1" +version = "0.3.2" description = "Open source metadata processing framework" authors = ["Zora eng "] readme = "README.md" diff --git a/tests/metadata/adapters/test_data_adapter.py b/tests/metadata/adapters/test_data_adapter.py index d21d409..25cacdb 100644 --- a/tests/metadata/adapters/test_data_adapter.py +++ b/tests/metadata/adapters/test_data_adapter.py @@ -1,4 +1,5 @@ import httpx +import json import pytest from pytest_httpx import HTTPXMock @@ -25,6 +26,25 @@ async def test_gen_head(self, httpx_mock: HTTPXMock): outgoing_request = httpx_mock.get_requests() assert not outgoing_request + @pytest.mark.asyncio + async def test_gen_head_not_base64(self, httpx_mock: HTTPXMock): + adapter = DataURIAdapter() + data_url = "data:application/json;utf8,{\"name\":\"here for now\",\"description\":\"sometimes i don't know how to feel when i'm away.\", \"image\": \"data:image/svg+xml;base64,PHN2ZyBpZD0iaDNpNXR6IiB2aWV3Qm94PSIwIDAgNDggNDgiIHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIHZlcnNpb249IjEuMSIgPgoKPGltYWdlIHg9IjAiIHk9IjAiIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgaW1hZ2UtcmVuZGVyaW5nPSJwaXhlbGF0ZWQiIHByZXNlcnZlQXNwZWN0UmF0aW89InhNaWRZTWlkIiB4bGluazpocmVmPSJkYXRhOmltYWdlL3BuZztiYXNlNjQsaVZCT1J3MEtHZ29BQUFBTlNVaEVVZ0FBQUZBQUFBQlFDQU1BQUFDNXp3S2ZBQUFBWUZCTVZFVlFZWHIveUFFK1QxNy9rZ0I0YVdJUkVST3RwcHNhSGl3QUFBQVhtc3pKeGJ4MFhCUndxY2IvN2dKbGJIOWZiNGE1NWQ3L3RBRFBsdzFyWEZmOTVpMFBEQXVBa3F5bGhqTFQ4KzdLeU1jL1BUamh3ai8yeHk0VmFvcU1yc0t4MTlDSW1CMllBQUFENUVsRVFWUjRBZTJVaVpLak9CQkVzUllKdWZFMUFsbzJxSGYrL3k4M2xTQUt4Z1NodnZaKzl1QjB0WGxSTlFVVXh4dzBPZWJ3VndqZnRQYXRjNjJuOHROQ3JmczJ1RGJpUXFYMXA0VlYxWWVCd3VDSHZxbytLOVFoQk9kYmp1emNFSUwrckhCd2JoYUNyeEhDQ0w1VzZCeU1meFBoMjRwbjRmcnZHVUt0dXlYWXNnanhwVnVpZFphd1hLSjFQL2cyNG9laDBsMjVKRmRvU29NM1hnME9UU08zWHNNcUQvekZ4NFR4UkVLWENKdHM0V0NDTVVOOEJZUE1aUHhjR0ppWU00V3RPWnVoTlhpZERUTFRNSnhUb1dYQnhFS204R0VleHNURFN6dzhYaGI1bDBKMmg2M0JtNzFJUTVLSHVmRFJwVWpoZzF2ZW90bW81QXFiYkRLRm9OTnJ1bzBLeUJJU3JWNWVTdklTa2ZoSXNWTkhJVk40dTkvdm8xRGlnL0VEd2g1YnZOOXV0OUtBcmZpUkRwdXlhV0piaHBFalB3eHFqQjhRR2hLRkVwa1l1MG9wdFNWVUU4ZlQ4U1FaUWdGM1dZb1BrMkxYRjBYQnM4YVhPaDFWcEpoUXNhb3VCSm4vaDhTQXJZZ09TVExDemRObllheE9RdVNNTGZmcFBPbVF3bGVDRG9HNi9IN0JHNWxDN3BOQ2lZOFVPeVhuUlJSUGx3N1hRcFV4OGxxb2tFUjRuWVZFcVNJS2Nha0FXaHJDeUNLRWFlVGtROXdhbWVDdjNMS1p0N3dSSUdRanlRZHcrdWJJcXNnU3lzajAwWTdUeDdRV29tdU0vSXhlOElzUUNXOEl0MGFtVUsyYll0VGxqT2JJWUMza3lQWUpyZTBtdXJ6ZDd2RXl2RVdoQnJOd1JJRTlZZmNrL0hrREN5R2FJbHBYVlRVSlgrMTV4dHJ4T0grZUZ5UWhMMnh0dXFydm81Q2QxYlgzM3RweEtmWXdBd09QbHJYeFk5M2g2R09IVVFnWHNiWHRPdS8xaFVJNVAwTkkzOCtGOEFRZ3JQRjNYMWZUbHUwUE1BcW53QUkvVmtMNjBLV01EQ0ZNdnF2eHo5WTloSkYwdnFLUWlQQjVLVEx5cTZvNnVHcThmVHpPUWs1STRVSGd5SW5uTFhOa0gyMzFBVG9MMUNXTi9Cc1loUWdhVEFVZUU3TGxKTFNqejBaaHo1dDVzOE9kcFNRbW9jZWM0T0FHNSsxOFlZTjFoeFpJaDV1WERaZGlhL2JYTzNEQXpKT1FJK2NMNmJ1elE0ditJaitDRFhYd2ZubzQ3SGU0WUxwc1pDbnNUOGVKd3dGeEpSenBsTjFsTGJSUmlONEc2MXpnYmdyWjhqc3BTN3ZBKytDQ0NLWERmTFJlQ2V2Z1hBMm1od01XdmZ1UTRRMXVsMVg1RXJIOVdRY0lBWVNLdTk2OS9KTHdJSnlYMzZ3N09EY0tMVnp3dldVTDdlSTNFZ2JzcEZvSXdlNURCc0tOb2xMekl3cjlVamNKSXhTU2ZTRklGbmxFMWRRUlcwenNQMlE0OHJwS0lkTzJjUGNoQStIcTYvZ2J1YjNxbWI1SVpDeEZPSUJWaHdrdnd0MW5Bb1NwdU5laGw0RnpoWUpkQ24yMDRTQSt0U0VVWnFGVXJRaVpjRWltQ2Z0dVlKQzBnZnhxZzJMMUlSYUpHK1FJcDArUmZLOVFGZC9GRlUvWjYrWDZQaTQ0YTVtRmExRjlNZjhML3hmK0hZUi9BRVJPMzlYOE5Fb1VBQUFBQUVsRlRrU3VRbUNDIi8+Cgo8L3N2Zz4=\"}" # noqa + async with httpx.AsyncClient() as client: + result = await adapter.gen_head(url=data_url, sess=client) + + expected = httpx.Response( + status_code=200, + headers={"content-type": "application/json;utf8", "content-length": "2600"}, + request=httpx.Request(method="HEAD", url=data_url), + ) + assert result.status_code == 200 + assert result.request.method == "HEAD" + assert result.headers == expected.headers + # no real request was made + outgoing_request = httpx_mock.get_requests() + assert not outgoing_request + @pytest.mark.asyncio async def test_gen_send(self, httpx_mock: HTTPXMock): adapter = DataURIAdapter() @@ -37,3 +57,18 @@ async def test_gen_send(self, httpx_mock: HTTPXMock): # no real request was made outgoing_request = httpx_mock.get_requests() assert not outgoing_request + + @pytest.mark.asyncio + async def test_gen_send_not_base64(self, httpx_mock: HTTPXMock): + adapter = DataURIAdapter() + json_str = "{\"name\":\"here for now\",\"description\":\"sometimes i don't know how to feel when i'm away.\", \"image\": \"data:image/svg+xml;base64,PHN2ZyBpZD0iaDNpNXR6IiB2aWV3Qm94PSIwIDAgNDggNDgiIHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIHZlcnNpb249IjEuMSIgPgoKPGltYWdlIHg9IjAiIHk9IjAiIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgaW1hZ2UtcmVuZGVyaW5nPSJwaXhlbGF0ZWQiIHByZXNlcnZlQXNwZWN0UmF0aW89InhNaWRZTWlkIiB4bGluazpocmVmPSJkYXRhOmltYWdlL3BuZztiYXNlNjQsaVZCT1J3MEtHZ29BQUFBTlNVaEVVZ0FBQUZBQUFBQlFDQU1BQUFDNXp3S2ZBQUFBWUZCTVZFVlFZWHIveUFFK1QxNy9rZ0I0YVdJUkVST3RwcHNhSGl3QUFBQVhtc3pKeGJ4MFhCUndxY2IvN2dKbGJIOWZiNGE1NWQ3L3RBRFBsdzFyWEZmOTVpMFBEQXVBa3F5bGhqTFQ4KzdLeU1jL1BUamh3ai8yeHk0VmFvcU1yc0t4MTlDSW1CMllBQUFENUVsRVFWUjRBZTJVaVpLak9CQkVzUllKdWZFMUFsbzJxSGYrL3k4M2xTQUt4Z1NodnZaKzl1QjB0WGxSTlFVVXh4dzBPZWJ3VndqZnRQYXRjNjJuOHROQ3JmczJ1RGJpUXFYMXA0VlYxWWVCd3VDSHZxbytLOVFoQk9kYmp1emNFSUwrckhCd2JoYUNyeEhDQ0w1VzZCeU1meFBoMjRwbjRmcnZHVUt0dXlYWXNnanhwVnVpZFphd1hLSjFQL2cyNG9laDBsMjVKRmRvU29NM1hnME9UU08zWHNNcUQvekZ4NFR4UkVLWENKdHM0V0NDTVVOOEJZUE1aUHhjR0ppWU00V3RPWnVoTlhpZERUTFRNSnhUb1dYQnhFS204R0VleHNURFN6dzhYaGI1bDBKMmg2M0JtNzFJUTVLSHVmRFJwVWpoZzF2ZW90bW81QXFiYkRLRm9OTnJ1bzBLeUJJU3JWNWVTdklTa2ZoSXNWTkhJVk40dTkvdm8xRGlnL0VEd2g1YnZOOXV0OUtBcmZpUkRwdXlhV0piaHBFalB3eHFqQjhRR2hLRkVwa1l1MG9wdFNWVUU4ZlQ4U1FaUWdGM1dZb1BrMkxYRjBYQnM4YVhPaDFWcEpoUXNhb3VCSm4vaDhTQXJZZ09TVExDemRObllheE9RdVNNTGZmcFBPbVF3bGVDRG9HNi9IN0JHNWxDN3BOQ2lZOFVPeVhuUlJSUGx3N1hRcFV4OGxxb2tFUjRuWVZFcVNJS2Nha0FXaHJDeUNLRWFlVGtROXdhbWVDdjNMS1p0N3dSSUdRanlRZHcrdWJJcXNnU3lzajAwWTdUeDdRV29tdU0vSXhlOElzUUNXOEl0MGFtVUsyYll0VGxqT2JJWUMza3lQWUpyZTBtdXJ6ZDd2RXl2RVdoQnJOd1JJRTlZZmNrL0hrREN5R2FJbHBYVlRVSlgrMTV4dHJ4T0grZUZ5UWhMMnh0dXFydm81Q2QxYlgzM3RweEtmWXdBd09QbHJYeFk5M2g2R09IVVFnWHNiWHRPdS8xaFVJNVAwTkkzOCtGOEFRZ3JQRjNYMWZUbHUwUE1BcW53QUkvVmtMNjBLV01EQ0ZNdnF2eHo5WTloSkYwdnFLUWlQQjVLVEx5cTZvNnVHcThmVHpPUWs1STRVSGd5SW5uTFhOa0gyMzFBVG9MMUNXTi9Cc1loUWdhVEFVZUU3TGxKTFNqejBaaHo1dDVzOE9kcFNRbW9jZWM0T0FHNSsxOFlZTjFoeFpJaDV1WERaZGlhL2JYTzNEQXpKT1FJK2NMNmJ1elE0ditJaitDRFhYd2ZubzQ3SGU0WUxwc1pDbnNUOGVKd3dGeEpSenBsTjFsTGJSUmlONEc2MXpnYmdyWjhqc3BTN3ZBKytDQ0NLWERmTFJlQ2V2Z1hBMm1od01XdmZ1UTRRMXVsMVg1RXJIOVdRY0lBWVNLdTk2OS9KTHdJSnlYMzZ3N09EY0tMVnp3dldVTDdlSTNFZ2JzcEZvSXdlNURCc0tOb2xMekl3cjlVamNKSXhTU2ZTRklGbmxFMWRRUlcwenNQMlE0OHJwS0lkTzJjUGNoQStIcTYvZ2J1YjNxbWI1SVpDeEZPSUJWaHdrdnd0MW5Bb1NwdU5laGw0RnpoWUpkQ24yMDRTQSt0U0VVWnFGVXJRaVpjRWltQ2Z0dVlKQzBnZnhxZzJMMUlSYUpHK1FJcDArUmZLOVFGZC9GRlUvWjYrWDZQaTQ0YTVtRmExRjlNZjhML3hmK0hZUi9BRVJPMzlYOE5Fb1VBQUFBQUVsRlRrU3VRbUNDIi8+Cgo8L3N2Zz4=\"}" # noqa + data_url = f"data:application/json;utf8,{json_str}" + async with httpx.AsyncClient() as client: + result = await adapter.gen_send(url=data_url, sess=client) + + assert result.status_code == 200 + assert result.request.method == "GET" + assert json.loads(result.text) == json.loads(json_str) + # no real request was made + outgoing_request = httpx_mock.get_requests() + assert not outgoing_request diff --git a/tests/metadata/parsers/test_zora_parser.py b/tests/metadata/parsers/test_zora_parser.py index 3dd7ae8..ecc5110 100644 --- a/tests/metadata/parsers/test_zora_parser.py +++ b/tests/metadata/parsers/test_zora_parser.py @@ -19,7 +19,7 @@ class TestZoraParser: token = Token( chain_identifier="ETHEREUM-MAINNET", collection_address="0xabefbc9fd2f806065b4f3c237d4b59d9a97bcac7", - token_id=5769, + token_id=31861, ) raw_data = { @@ -46,8 +46,8 @@ def test_zora_parser_parses_metadata(self): # type: ignore[no-untyped-def] token=Token( chain_identifier="ETHEREUM-MAINNET", collection_address="0xabefbc9fd2f806065b4f3c237d4b59d9a97bcac7", - token_id=5769, - uri="https://zora-dev.mypinata.cloud/ipfs/bafkreigux6jujn5hvlmptgzgok4reaie2gkuvsk2kynnalsyfgr4g35dkm", + token_id=31861, + uri="https://gateway.pinata.cloud/ipfs/bafkreid3jq3mlqz4d3w7emkxpftmfjbbxtkwe7kf25lzp2krwcxfd57m6q", ), raw_data={ "description": "A Lonely Soul,\n\nI've felt lonely lately. Somewhere deep inside, detached. \n\nThere must be plenty of lost souls wandering the globe. Looking to belong; to understand their purpose.\n\nI know my purpose, but I fear I've burned up surviving to the moment.\n\nDoes this count?\nAm I still pushing forward?\n\nI hope so...\n\nDo I still have time?\nAm I just floating?\n\nPlease, don't give up.\n\nAge 23 (2021)\n4096x4096px", @@ -63,7 +63,7 @@ def test_zora_parser_parses_metadata(self): # type: ignore[no-untyped-def] image=MediaDetails( size=13548199, sha256=None, - uri="https://zora-dev.mypinata.cloud/ipfs/bafybeiffwxjez2axebcprj2h7wkohr2pdbvuv37f7uxyptuw7o6t5fvppu", + uri="https://gateway.pinata.cloud/ipfs/bafybeifavbhn6ys3k4tvngt4rxkoo7vabiv4lnlszwkvdncjg245qz5chq", mime_type="image/jpeg", ), content=None,