diff --git a/docs/data.md b/docs/data.md index c3ac4d418d..0ae8a45866 100644 --- a/docs/data.md +++ b/docs/data.md @@ -178,73 +178,7 @@ Once we have the data we can also store it, in principle, anywhere but I will be By the way I can't just pull down this data myself and put it on github to save you time. Storing large amounts of data in github isn't a good idea regardless of whether it is in .csv or Mongo files, and there would also be licensing issues with me basically just copying and pasting raw data that belongs to someone else. You have to get, and then store, this stuff yourself. And of course at some point in a live system you would be updating this yourself. -An easy way to bulk download data from [Barchart](https://www.barchart.com) is to create a Premier account, which allows for up to 100 data downloads per day, and to use [bc-utils](https://github.com/bug-or-feature/bc-utils) by [Andy Geach](https://github.com/bug-or-feature). -We explain how to use it with pysystemtrade at the time of writing below, but we recommend that you read the bc-utils documentation in case these instructions become stale with updated versions of the tool. - -To set up bc-utils for use with pysystemtrade, you can use the following steps: -1. Clone the bc-utils repo to some directory of your choice. For concreteness, we will be using `~/bc-utils` here. - -2. Edit `~/bc-utils/bcutils/config.py` to contain the list of contracts you want to download data for. -For example, -```python -CONTRACT_MAP = { - "RICE": {"code": "ZR", "cycle": "FHKNUX", "tick_date": "2009-01-01"}, - "SOYOIL": {"code": "ZL", "cycle": "FHKNQUVZ", "tick_date": "2008-05-04"}, -} -``` -indicates that we are downloading data for the contracts ZR and ZL on Barchart and are matching them to the symbols RICE and SOYOIL, respectively, in pysystemtrade. -Further, we are downloading the months FHKNUX and FHKNQUVZ, respectively, with hourly data starting from 2009-01-01 and 2008-05-04, respectively, and daily data before those dates. - -3. Replace the last code block in `~/bc-utils/bcutils/bc_utils.py` (starting from line 420, at [the time of writing](https://github.com/bug-or-feature/bc-utils/commit/3b95acaa2bbae87af3aaef65dd4f50839986a7d4)) with - -```python -get_barchart_downloads( - create_bc_session(config=config), - contract_map=CONTRACT_MAP, - save_directory="BARCHART_DATA_DOWNLOAD_DIRECTORY", - start_year=1975, - end_year=2026, - dry_run=False) -``` -(Here, you can set `dry_run` to `True` if you would like to try this script without using any of your 100 daily downloads.) - -4. In `~/bc-utils/bcutils/bc_utils.py`, set your Barchart username (BARCHART_USERNAME), password (BARCHART_PASSWORD), and the desired data path (BARCHART_DATA_DOWNLOAD_DIRECTORY) for the Barchart data here: -```python -'barchart_username': 'BARCHART_USERNAME', -'barchart_password': 'BARCHART_PASSWORD' -``` - -5. If desired, add bc-utils to your crontab by adding a line like -``` -00 08 * * 1-7 . $HOME/.profile; cd ~/bc-utils ; python3 bcutils/bc_utils.py >> $ECHO_PATH/barchart_download.txt 2>&1 -``` -This can be helpful given the daily limit of 100 downloads. - -6. Once you have downloaded the data you want, you can add them to the mongo database by running the following python snippet (with your chosen BARCHART_DATA_DOWNLOAD_DIRECTORY) from the pysystemtrade directory: -```python -from sysdata.csv.csv_futures_contract_prices import ConfigCsvFuturesPrices -from sysinit.futures.contract_prices_from_csv_to_arctic import ( - init_arctic_with_csv_futures_contract_prices, -) - - -barchart_csv_config = ConfigCsvFuturesPrices(input_date_index_name="Time", - input_skiprows=0, - input_skipfooter=1, - input_date_format="%Y-%m-%d", - input_column_mapping=dict(OPEN="Open", HIGH="High", LOW="Low", FINAL="Close", VOLUME="Volume" - ), -) - - -def transfer_barchart_prices_to_arctic(datapath): - init_arctic_with_csv_futures_contract_prices( - datapath, csv_config=barchart_csv_config - ) - - -transfer_barchart_prices_to_arctic(BARCHART_DATA_DOWNLOAD_DIRECTORY) -``` +An easy way to bulk download data from [Barchart](https://www.barchart.com) is to create a Premier account, which allows for up to 250 data downloads per day, and to use [bc-utils](https://github.com/bug-or-feature/bc-utils). That project has a [guide for pysystemtrade users](https://github.com/bug-or-feature/bc-utils?tab=readme-ov-file#for-pysystemtrade-users). Alternatively, if you are very patient, you can manually download the data from the Barchart historical data pages, such as [this one for Cotton #2](https://www.barchart.com/futures/quotes/KG*0/historical-download). diff --git a/sysdata/csv/csv_futures_contract_prices.py b/sysdata/csv/csv_futures_contract_prices.py index e1b9f8fcba..872b0db99f 100644 --- a/sysdata/csv/csv_futures_contract_prices.py +++ b/sysdata/csv/csv_futures_contract_prices.py @@ -223,7 +223,7 @@ def _keyname_given_contract_object_and_freq( if frequency is MIXED_FREQ: frequency_str = "" else: - frequency_str = frequency.name + "/" + frequency_str = frequency.name + "_" instrument_str = str(futures_contract_object.instrument) date_str = str(futures_contract_object.date_str) @@ -239,11 +239,11 @@ def _contract_tuple_and_freq_given_keyname(self, keyname: str) -> tuple: :param keyname: str :return: tuple instrument_code, contract_date """ - first_split_keyname_as_list = keyname.split("/") - if len(first_split_keyname_as_list) == 2: + if keyname.startswith("Day") or keyname.startswith("Hour"): ## has frequency - frequency = Frequency[first_split_keyname_as_list[0]] - residual_keyname = first_split_keyname_as_list[1] + index = keyname.find("_") + frequency = Frequency[keyname[:index]] + residual_keyname = keyname[index + 1 :] else: ## no frequency, mixed data frequency = MIXED_FREQ diff --git a/sysinit/futures/contract_prices_from_csv_to_arctic.py b/sysinit/futures/contract_prices_from_csv_to_arctic.py index 4d489e8bae..3d46d695f1 100644 --- a/sysinit/futures/contract_prices_from_csv_to_arctic.py +++ b/sysinit/futures/contract_prices_from_csv_to_arctic.py @@ -1,5 +1,6 @@ from syscore.constants import arg_not_supplied - +from syscore.dateutils import MIXED_FREQ, HOURLY_FREQ, DAILY_PRICE_FREQ +from syscore.pandas.frequency import merge_data_with_different_freq from sysdata.csv.csv_futures_contract_prices import csvFuturesContractPriceData from sysproduction.data.prices import diagPrices from sysobjects.contracts import futuresContract @@ -8,7 +9,9 @@ def init_db_with_csv_futures_contract_prices( - datapath: str, csv_config=arg_not_supplied + datapath: str, + csv_config=arg_not_supplied, + frequency=MIXED_FREQ, ): csv_prices = csvFuturesContractPriceData(datapath) input( @@ -16,25 +19,32 @@ def init_db_with_csv_futures_contract_prices( % csv_prices.datapath ) - instrument_codes = csv_prices.get_list_of_instrument_codes_with_merged_price_data() + instrument_codes = ( + csv_prices.get_list_of_instrument_codes_with_price_data_at_frequency(frequency) + ) instrument_codes.sort() for instrument_code in instrument_codes: init_db_with_csv_futures_contract_prices_for_code( - instrument_code, datapath, csv_config=csv_config + instrument_code, datapath, csv_config=csv_config, frequency=frequency ) def init_db_with_csv_futures_contract_prices_for_code( - instrument_code: str, datapath: str, csv_config=arg_not_supplied + instrument_code: str, + datapath: str, + csv_config=arg_not_supplied, + frequency=MIXED_FREQ, ): print(instrument_code) csv_prices = csvFuturesContractPriceData(datapath, config=csv_config) db_prices = diag_prices.db_futures_contract_price_data - print("Getting .csv prices may take some time") - csv_price_dict = csv_prices.get_merged_prices_for_instrument(instrument_code) + print(f"Getting {frequency} .csv prices may take some time") + csv_price_dict = csv_prices.get_prices_at_frequency_for_instrument( + instrument_code, frequency + ) - print("Have .csv prices for the following contracts:") + print(f"Have {frequency} .csv prices for the following contracts:") print(str(csv_price_dict.keys())) for contract_date_str, prices_for_contract in csv_price_dict.items(): @@ -43,16 +53,53 @@ def init_db_with_csv_futures_contract_prices_for_code( contract = futuresContract(instrument_code, contract_date_str) print("Contract object is %s" % str(contract)) print("Writing to db") - db_prices.write_merged_prices_for_contract_object( - contract, prices_for_contract, ignore_duplication=True + db_prices.write_prices_at_frequency_for_contract_object( + contract, + prices_for_contract, + ignore_duplication=True, + frequency=frequency, + ) + print(f"Reading back {frequency} prices from db to check") + written_prices = db_prices.get_prices_at_frequency_for_contract_object( + contract, frequency=frequency ) - print("Reading back prices from db to check") - written_prices = db_prices.get_merged_prices_for_contract_object(contract) print("Read back prices are \n %s" % str(written_prices)) + # if we're importing hourly or daily, we need to also generate MIXED + if frequency != MIXED_FREQ: + create_merged_prices(contract) + + +def create_merged_prices(contract): + db_prices = diag_prices.db_futures_contract_price_data + if db_prices.has_price_data_for_contract_at_frequency( + contract, DAILY_PRICE_FREQ + ) and db_prices.has_price_data_for_contract_at_frequency(contract, HOURLY_FREQ): + print(f"DB has hourly and daily prices for {contract}, creating merged prices") + list_of_data = [ + diag_prices.get_prices_at_frequency_for_contract_object( + contract, + frequency=frequency, + ) + for frequency in [HOURLY_FREQ, DAILY_PRICE_FREQ] + ] + merged_prices = merge_data_with_different_freq(list_of_data) + print("Writing to db") + db_prices.write_prices_at_frequency_for_contract_object( + contract, merged_prices, frequency=MIXED_FREQ, ignore_duplication=True + ) + print("Reading back prices from db to check") + written_merged_prices = db_prices.get_prices_at_frequency_for_contract_object( + contract, frequency=MIXED_FREQ + ) + + print(f"Read back prices (MIXED) are \n{str(written_merged_prices)}") + if __name__ == "__main__": input("Will overwrite existing prices are you sure?! CTL-C to abort") # modify flags as required datapath = "*** NEED TO DEFINE A DATAPATH***" init_db_with_csv_futures_contract_prices(datapath) + # init_db_with_csv_futures_contract_prices(datapath, frequency=HOURLY_FREQ) + # init_db_with_csv_futures_contract_prices(datapath, frequency=DAILY_PRICE_FREQ)