From 3e0cf2c2e0ee5fd0953bb605bdd37293886d4da5 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Tue, 23 Jul 2024 11:26:03 +1000 Subject: [PATCH] use new duplicate-url-discarder==0.2.0 item pipeline --- requirements.txt | 2 +- zyte_spider_templates_project/settings.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d83099b..fc7b705 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ Scrapy==2.11.2 scrapy-zyte-api[provider]>=0.17.3 zyte-spider-templates==0.7.2 -duplicate-url-discarder[rules]>=0.1.0 +duplicate-url-discarder[rules]>=0.2.0 diff --git a/zyte_spider_templates_project/settings.py b/zyte_spider_templates_project/settings.py index a92bb07..0b92d9a 100644 --- a/zyte_spider_templates_project/settings.py +++ b/zyte_spider_templates_project/settings.py @@ -30,3 +30,19 @@ "zyte_spider_templates.pages", "zyte_spider_templates_project.pages", ] + +# duplicate-url-discarder +DUD_ATTRIBUTES_PER_ITEM = { + "zyte_common_items.Product": [ + "canonicalUrl", + "brand", + "name", + "gtin", + "mpn", + "productId", + "sku", + "color", + "size", + "style" + ], +}