diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c659246 --- /dev/null +++ b/Makefile @@ -0,0 +1,37 @@ +# Simple makefile to simplify repetitive build env management tasks under posix + +CODESPELL_DIRS ?= ./ +CODESPELL_SKIP ?= "*.pyc,*.txt,*.gif,*.png,*.jpg,*.ply,*.vtk,*.vti,*.js,*.html,*.doctree,*.ttf,*.woff,*.woff2,*.eot,*.mp4,*.inv,*.pickle,*.ipynb,flycheck*,./.git/*,./.hypothesis/*,*.yml,./docs/_build/*,./docs/images/*,./dist/*,*~,.hypothesis*,./docs/examples/*,*.mypy_cache/*,*cover,./tests/htmlcov/*,*.css,*.svg" +CODESPELL_IGNORE ?= "ignore_words.txt" + +all: doctest + +doctest: codespell pydocstyle + +codespell: + @echo "Running codespell" + @codespell $(CODESPELL_DIRS) -S $(CODESPELL_SKIP) -I $(CODESPELL_IGNORE) + +pydocstyle: + @echo "Running pydocstyle" + @pydocstyle pyvista + +doctest-modules: + @echo "Runnnig module doctesting" + pytest -v --doctest-modules pyvista + +coverage: + @echo "Running coverage" + @pytest -v --cov pyvista + +coverage-xml: + @echo "Reporting XML coverage" + @pytest -v --cov pyvista --cov-report xml + +coverage-html: + @echo "Reporting HTML coverage" + @pytest -v --cov pyvista --cov-report html + +mypy: + @echo "Running mypy static type checking" + mypy pyvista/core/ --no-incremental diff --git a/docs/source/product_query.rst b/docs/source/product_query.rst index 883270f..368c674 100644 --- a/docs/source/product_query.rst +++ b/docs/source/product_query.rst @@ -55,7 +55,10 @@ The ``products`` variable is a list of product data with one entry per successfu print('ASIN is ' + products[0]['asin']) print('Title is ' + products[0]['title']) -When the parameter ``history`` is True (enabled by default, the each product contains a The raw data is contained within each product result. Raw data is stored as a dictonary with each key paired with its associated time history. +When the parameter ``history`` is True (enabled by default, the each +product contains a The raw data is contained within each product +result. Raw data is stored as a dictionary with each key paired with +its associated time history. .. code:: python diff --git a/ignore_words.txt b/ignore_words.txt new file mode 100644 index 0000000..0870381 --- /dev/null +++ b/ignore_words.txt @@ -0,0 +1,3 @@ +lod +byteorder +flem diff --git a/keepa/_version.py b/keepa/_version.py index 91d25ef..528151d 100644 --- a/keepa/_version.py +++ b/keepa/_version.py @@ -1,6 +1,6 @@ """Version number for keepa """ # major, minor, patch, -extra -version_info = 1, 2, 1 +version_info = 1, 3, 0 __version__ = '.'.join(map(str, version_info)) diff --git a/keepa/interface.py b/keepa/interface.py index a9f4a56..89b2680 100644 --- a/keepa/interface.py +++ b/keepa/interface.py @@ -2,15 +2,16 @@ keepa.com """ -from tqdm import tqdm -import aiohttp import asyncio import datetime import json import logging +import time + +import aiohttp import numpy as np import pandas as pd -import time +from tqdm import tqdm from keepa.query_keys import DEAL_REQUEST_KEYS, PRODUCT_REQUEST_KEYS @@ -357,16 +358,15 @@ async def create(cls, accesskey): self.tokens_left = 0 # Store user's available tokens - log.info('Connecting to keepa using key ending in %s' % accesskey[-6:]) + log.info('Connecting to keepa using key ending in %s', accesskey[-6:]) await self.update_status() - log.info('%d tokens remain' % self.tokens_left) - + log.info('%d tokens remain', self.tokens_left) return self @property def time_to_refill(self): """ Returns the time to refill in seconds """ - # Get current timestamp in miliseconds from unix epoch + # Get current timestamp in milliseconds from UNIX epoch now = int(time.time() * 1000) timeatrefile = self.status['timestamp'] + self.status['refillIn'] @@ -401,7 +401,7 @@ async def query(self, items, stats=None, domain='US', history=True, offers=None, update=None, to_datetime=True, rating=False, out_of_stock_as_nan=True, stock=False, product_code_is_asin=True, progress_bar=True, buybox=False, - wait=True): + wait=True, days=None, only_live_offers=None): """ Performs a product query of a list, array, or single ASIN. Returns a list of product data with one entry for each product. @@ -496,7 +496,31 @@ async def query(self, items, stats=None, domain='US', history=True, parameter is required. wait : bool, optional - Wait available token before doing effective query, Defaults to ``True``. + Wait available token before doing effective query, + Defaults to ``True``. + + only_live_offers : bool, optional + If set to True, the product object will only include live + marketplace offers (when used in combination with the + offers parameter). If you do not need historical offers + use this to have them removed from the response. This can + improve processing time and considerably decrease the size + of the response. Default None + + days : int, optional + Any positive integer value. If specified and has positive + value X the product object will limit all historical data + to the recent X days. This includes the csv, + buyBoxSellerIdHistory, salesRanks, offers and + offers.offerCSV fields. If you do not need old historical + data use this to have it removed from the response. This + can improve processing time and considerably decrease the + size of the response. The parameter does not use calendar + days - so 1 day equals the last 24 hours. The oldest data + point of each field may have a date value which is out of + the specified range. This means the value of the field has + not changed since that date and is still active. Default + ``None`` Returns ------- @@ -641,7 +665,8 @@ async def query(self, items, stats=None, domain='US', history=True, # check offer input if offers: - assert isinstance(offers, int), 'Parameter "offers" must be an interger' + if not isinstance(offers, int): + raise TypeError('Parameter "offers" must be an interger') if offers > 100 or offers < 20: raise ValueError('Parameter "offers" must be between 20 and 100') @@ -651,9 +676,9 @@ async def query(self, items, stats=None, domain='US', history=True, 60000 - self.status['refillIn']) / 60000.0 if tcomplete < 0.0: tcomplete = 0.5 - log.debug('Estimated time to complete %d request(s) is %.2f minutes' % - (nitems, tcomplete)) - log.debug('\twith a refill rate of %d token(s) per minute' % + log.debug('Estimated time to complete %d request(s) is %.2f minutes', + nitems, tcomplete) + log.debug('\twith a refill rate of %d token(s) per minute', self.status['refillRate']) # product list @@ -685,7 +710,10 @@ async def query(self, items, stats=None, domain='US', history=True, to_datetime=to_datetime, out_of_stock_as_nan=out_of_stock_as_nan, buybox=buybox, - wait=wait) + wait=wait, + days=days, + only_live_offers=only_live_offers, + ) idx += nrequest products.extend(response['products']) @@ -738,7 +766,7 @@ async def _product_query(self, items, product_code_is_asin=True, **kwargs): ASINs. refillIn : float - Time in miliseconds to the next refill of tokens. + Time in milliseconds to the next refill of tokens. refilRate : float Number of tokens refilled per minute @@ -754,6 +782,7 @@ async def _product_query(self, items, product_code_is_asin=True, **kwargs): """ # ASINs convert to comma joined string assert len(items) <= 100 + if product_code_is_asin: kwargs['asin'] = ','.join(items) else: @@ -761,6 +790,8 @@ async def _product_query(self, items, product_code_is_asin=True, **kwargs): kwargs['key'] = self.accesskey kwargs['domain'] = DCODES.index(kwargs['domain']) + + # Convert bool values to 0 and 1. kwargs['stock'] = int(kwargs['stock']) kwargs['history'] = int(kwargs['history']) kwargs['rating'] = int(kwargs['rating']) @@ -776,10 +807,21 @@ async def _product_query(self, items, product_code_is_asin=True, **kwargs): else: kwargs['offers'] = int(kwargs['offers']) + if kwargs['only_live_offers'] is None: + del kwargs['only_live_offers'] + else: + kwargs['only-live-offers'] = int(kwargs.pop('only_live_offers')) + # Keepa's param actually doesn't use snake_case. + # I believe using snake case throughout the Keepa interface is better. + + if kwargs['days'] is None: + del kwargs['days'] + else: + assert kwargs['days'] > 0 + if kwargs['stats'] is None: del kwargs['stats'] - kwargs['rating'] = int(kwargs['rating']) out_of_stock_as_nan = kwargs.pop('out_of_stock_as_nan', True) to_datetime = kwargs.pop('to_datetime', True) @@ -842,7 +884,8 @@ async def best_sellers_query(self, category, rank_avg_range=0, domain='US', wait Default US wait : bool, optional - Wait available token before doing effective query, Defaults to ``True``. + Wait available token before doing effective query. + Defaults to ``True``. Returns ------- @@ -872,7 +915,8 @@ async def search_for_categories(self, searchterm, domain='US', wait=True): Input search term. wait : bool, optional - Wait available token before doing effective query, Defaults to ``True``. + Wait available token before doing effective query. + Defaults to ``True``. Returns ------- @@ -903,7 +947,8 @@ async def search_for_categories(self, searchterm, domain='US', wait=True): else: return response['categories'] - async def category_lookup(self, category_id, domain='US', include_parents=0, wait=True): + async def category_lookup(self, category_id, domain='US', + include_parents=0, wait=True): """ Return root categories given a categoryId. @@ -922,7 +967,8 @@ async def category_lookup(self, category_id, domain='US', include_parents=0, wai Include parents. wait : bool, optional - Wait available token before doing effective query, Defaults to ``True``. + Wait available token before doing effective query. + Defaults to ``True``. Returns ------- @@ -996,21 +1042,23 @@ async def seller_query(self, seller_id, domain='US', to_datetime=True, Using this parameter you can achieve the following: - - Retrieve data from Amazon: a storefront ASIN list containing - up to 2,400 ASINs, in addition to all ASINs already collected - through our database. - - Force a refresh: Always retrieve live data with the value 0. - - Retrieve the total number of listings of this seller: the - totalStorefrontAsinsCSV field of the seller object will be - updated. + - Retrieve data from Amazon: a storefront ASIN list + containing up to 2,400 ASINs, in addition to all ASINs + already collected through our database. + - Force a refresh: Always retrieve live data with the + value 0. + - Retrieve the total number of listings of this seller: + the totalStorefrontAsinsCSV field of the seller object + will be updated. wait : bool, optional - Wait available token before doing effective query, Defaults to ``True``. + Wait available token before doing effective query. + Defaults to ``True``. Returns ------- seller_info : dict - Dictionary containing one entry per input seller_id. + Dictionary containing one entry per input ``seller_id``. Examples -------- @@ -2092,7 +2140,7 @@ async def deals(self, deal_parms, domain='US', wait=True): You can find products that recently changed and match your search criteria. A single request will return a maximum of - 150 deals. Try ou the deals page to frist get accustomed to + 150 deals. Try out the deals page to first get accustomed to the options: https://keepa.com/#!deals @@ -2164,7 +2212,7 @@ async def deals(self, deal_parms, domain='US', wait=True): async def _request(self, request_type, payload, wait=True): """Queries keepa api server. Parses raw response from keepa - into a json format. Handles errors and waits for avaialbe + into a json format. Handles errors and waits for available tokens if allowed. """ @@ -2202,11 +2250,11 @@ def convert_offer_history(csv, to_datetime=True): Parameters ---------- csv : list - Offer list csv obtained from ['offerCSV'] + Offer list csv obtained from ``['offerCSV']`` to_datetime : bool, optional - Modifies numpy minutes to datetime.datetime values. - Default True. + Modifies ``numpy`` minutes to ``datetime.datetime`` values. + Default ``True``. Returns ------- diff --git a/tests/test_interface.py b/tests/test_interface.py index b8a9852..9cbf721 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -1,13 +1,15 @@ +import datetime import os +from itertools import chain import numpy as np -import pytest import pandas as pd +import pytest import keepa -import datetime - # reduce the request limit for testing +from keepa import keepa_minutes_to_time + keepa.interface.REQLIM = 2 try: @@ -190,6 +192,47 @@ def test_productquery_offers(api): assert len(prices) +def test_productquery_only_live_offers(api): + """Tests that no historical offer data was returned from response if only_live_offers param was specified.""" + max_offers = 20 + request = api.query(PRODUCT_ASIN, offers=max_offers, only_live_offers=True) + product_offers = request[0]['offers'] + + # Check there are no additional historical offers in the response. + assert len(product_offers) <= max_offers + + # All offers are live and have the same last_seen keepa minutes date. + last_seen_values = {offer['lastSeen'] for offer in product_offers} + assert len(last_seen_values) == 1 + + +def test_productquery_days(api, max_days: int = 5): + """Tests that 'days' param limits historical data to X days. + This includes the csv, buyBoxSellerIdHistory, salesRanks, offers and offers.offerCSV fields. + Each field may contain one day which seems out of specified range. This means the value of the field has been + unchanged since that date, and was still active at least until the max_days cutoff.""" + + request = api.query(PRODUCT_ASIN, days=max_days, history=True, offers=20) + product = request[0] + convert = lambda minutes: list(set(keepa_minutes_to_time(keepa_minute).date() for keepa_minute in minutes)) + + # Converting each field's list of keepa minutes into flat list of unique days. + sales_ranks = convert(chain.from_iterable(product['salesRanks'].values()))[0::2] + offers = convert(offer['lastSeen'] for offer in product['offers']) + buy_box_seller_id_history = convert(product['buyBoxSellerIdHistory'][0::2]) + offers_csv = list(convert(offer['offerCSV'][0::3]) for offer in product['offers']) + df_dates = list(list(df.axes[0]) for df_name, df in product['data'].items() if 'df_' in df_name and any(df)) + df_dates = list(list(datetime.date(year=ts.year, month=ts.month, day=ts.day) for ts in stamps) for stamps in df_dates) + + # Check for out of range days. + today = datetime.date.today() + is_out_of_range = lambda d: (today - d).days > max_days + for field_days in [sales_ranks, offers, buy_box_seller_id_history, *df_dates, *offers_csv]: + field_days.sort() + field_days = field_days[1:] if is_out_of_range(field_days[0]) else field_days # let oldest day be out of range + for day in field_days: + assert not is_out_of_range(day), day + def test_productquery_offers_invalid(api): with pytest.raises(ValueError): api.query(PRODUCT_ASIN, offers=2000)