Skip to content

Commit

Permalink
feat: simple script for top product stats
Browse files Browse the repository at this point in the history
  • Loading branch information
raphodn committed Aug 24, 2024
1 parent 612c939 commit 82e1431
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
4 changes: 2 additions & 2 deletions scripts/gdpr/create_prices_from_gdpr_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

OPEN_PRICES_CREATE_PRICE_ENDPOINT = f'{os.environ.get("API_ENDPOINT")}/prices'
OPEN_PRICES_TOKEN = os.environ.get("API_TOKEN")
GDPR_FIELD_MAPPING_FILEPATH = "data/gdpr/gdpr_field_mapping.csv"
GDPR_FIELD_MAPPING_FILEPATH = "scripts/gdpr/gdpr_field_mapping.csv"

DEFAULT_PRICE_CURRENCY = "EUR"
PRICE_FIELDS = [
Expand Down Expand Up @@ -219,7 +219,7 @@ def create_price(price):
if __name__ == "__main__":
"""
How-to run:
> FILEPATH= poetry run python data/gdpr/create_prices_from_gdpr_csv.py
> FILEPATH= poetry run python scripts/gdpr/create_prices_from_gdpr_csv.py
Required params: see REQUIRED_ENV_PARAMS
"""
# Step 1: read input file
Expand Down
54 changes: 54 additions & 0 deletions scripts/stats/top_products.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import time

import requests

OPEN_PRICES_PRODUCT_ENDPOINT = "https://prices.openfoodfacts.org/api/v1/products" # page=1&size=10&order_by=-unique_scans_n&source=off


def get_product_list(count=100, source="off"):
"""
Get the list of products from Open Prices
"""
product_list = list()

# loop the API
page = 1
size = 100
while len(product_list) < count:
url = f"{OPEN_PRICES_PRODUCT_ENDPOINT}?page={page}&size={size}&order_by=-unique_scans_n&source={source}"
print(url, len(product_list))
response = requests.get(url)
if response.status_code == 200:
product_list += response.json()["items"]
page += 1
time.sleep(1)
else:
break

return product_list


def aggregate_product_list(product_list, field="price_count"):
result = {"True": 0, "False": 0}

for product in product_list:
if product[field]:
result["True"] += 1
else:
result["False"] += 1

return result


if __name__ == "__main__":
"""
How-to run:
> poetry run python scripts/stats/top_products.py
"""
# Step 1: get the list of products from OP
product_list = get_product_list(count=1000)
print(len(product_list))

# Step 2: counts
result = aggregate_product_list(product_list, field="price_count")
print(result)

0 comments on commit 82e1431

Please sign in to comment.