From afc9fe77d259f6da018179596755e2b578924195 Mon Sep 17 00:00:00 2001 From: Jitendra Varma Date: Sat, 4 May 2024 17:55:31 +0530 Subject: [PATCH] Reports: Added logic to download bhav copy and market activity reports. (#50) Co-authored-by: Witchking --- README.md | 37 ++++++++-- nsedt/derivatives/__init__.py | 5 +- nsedt/derivatives/options.py | 35 +++++---- nsedt/equity.py | 1 + nsedt/reports.py | 58 +++++++++++++++ nsedt/resources/constants.py | 5 ++ nsedt/utils/__init__.py | 118 ++++++++++++++++++++++++++---- requirements.txt | 1 + setup.py | 1 + tests/test_derivatives_options.py | 4 +- tests/test_reports.py | 25 +++++++ 11 files changed, 248 insertions(+), 42 deletions(-) create mode 100644 nsedt/reports.py create mode 100644 tests/test_reports.py diff --git a/README.md b/README.md index 39b8124..5a36c4b 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Thank you for using Nsedt. Please feel free to send pull requests, comments, and `pip install -r requirements.txt` 3. Install locally `pip install . --upgrade` + --- ## Equity @@ -84,13 +85,14 @@ data["Date"] = pd.to_datetime(data["Date"],format='%d-%b-%Y') ### Details -| Name | Module name | Description | Argument | Response | -| ------------------------ | ------------------------ | ------------------------ | ------------------------------------------------------------------------- | -------- | -| vix | get_vix | price | start_date, end_date,columns_drop_list | panda df | -| option chain | get_option_chain | get option price | symbol,strikePrice,expiryDate | panda df | -| option chain expiry date | get_option_chain_expdate | option chain expiry date | symbol | json | -| future price | get_future_price | get future price | symbol, start_date, end_date, expiryDate,response_type, columns_drop_list | panda df | -| future expiry date | get_future_expdate | future expiry date | symbol | json | +| Name | Module name | Description | Argument | Response | +| ------------------------ | -------------------------- | ---------------------------------------------------- | ------------------------------------------------------------------------- | --------------- | +| vix | get_vix | price | start_date, end_date,columns_drop_list | panda df | +| option chain | get_option_chain | get option price | symbol,strikePrice,expiryDate | panda df | +| option chain expiry date | get_option_chain_expdate | option chain expiry date | symbol | json | +| future price | get_future_price | get future price | symbol, start_date, end_date, expiryDate,response_type, columns_drop_list | panda df | +| future expiry date | get_future_expdate | future expiry date | symbol | json | +| historical option chain | get_historical_option_data | get historical option value for a given strike price | symbol, start_date,end_date,option_type,strike_price,year,expiry_date | json, pandas df | ### step to run @@ -99,6 +101,7 @@ from nsedt import derivatives as de start_date = "24-04-2024" end_date = "25-04-2024" # date format "%d-%m-%Y" + print(de.get_vix(start_date, end_date)) print(de.get_option_chain_expdate(symbol="TCS")) print(de.get_option_chain(symbol="TCS", strike_price=3300, expiry_date="30-05-2024")) @@ -106,3 +109,23 @@ print(de.get_future_price(symbol="TCS", start_date=start_date, end_date=end_date print(de.get_future_expdate(symbol="TCS")) print(de.get_historical_option_data(symbol="TATAMOTORS", start_date=start_date, end_date=end_date, option_type="CE", strike_price="1020", year="2024", expiry_date="30-May-2024")) ``` + +# Reports + +### Details + +| Name | Module name | Description | Argument | Response | +| --------------- | -------------------------- | -------------------------------------- | ----------------------- | -------- | +| market activity | get_market_activity_report | get raw text of market activity report | date | string | +| bhav copy | get_bhav_copy_zip | download bhav copy zip for a given day | date, file_path_to_save | bool | + +### step to run + +```py +from nsedt import reports as rep +# date format "%d-%m-%Y" + +print(rep.get_market_activity_report(date="300424")) # format %d%m%y +print(rep.get_bhav_copy_zip(date="30APR2024", file_path="path_where_you_want_to_save")) # format %d%b%Y + +``` diff --git a/nsedt/derivatives/__init__.py b/nsedt/derivatives/__init__.py index 33f98fe..197b60b 100644 --- a/nsedt/derivatives/__init__.py +++ b/nsedt/derivatives/__init__.py @@ -12,10 +12,7 @@ from nsedt.resources import constants as cns from nsedt.utils import data_format from nsedt.derivatives.options import ( - get_option_chain, - get_option_chain_expdate, - get_historical_option_data - ) + get_option_chain, get_option_chain_expdate, get_historical_option_data) from nsedt.derivatives.futures import get_future_price, get_future_expdate log = logging.getLogger("root") diff --git a/nsedt/derivatives/options.py b/nsedt/derivatives/options.py index f09b357..8b7fe7f 100644 --- a/nsedt/derivatives/options.py +++ b/nsedt/derivatives/options.py @@ -129,31 +129,39 @@ def get_historical_option_data( """ Get historical data for option chain for a given expiry Args: - symbol (str): _description_ - start_date (str): _description_ - end_date (str): _description_ - option_type (str): _description_. - option_type (str): _description_. - strike_price (str): _description_. - year (str): _description_. - expiry_date (str): _description_. - response_type (str, optional): _description_. Defaults to "panda_df". - columns_drop_list (list, optional): _description_. Defaults to None. + symbol (str): valid scrip name + start_date (str): in %d-%m-%Y format + end_date (str): in %d-%m-%Y format + option_type (str): CE or PE. + strike_price (str): valid integer. + year (str): in %Y format eg 2024. + expiry_date (str): in %d-%m-%Y format + response_type (str, optional): either json or pand_df. Defaults to "panda_df". + columns_drop_list (list, optional): list of columns to skip. Defaults to None. Returns: - _type_: _description_ + _type_: either json or pandas df. Defaults to pandas_df """ cookies = utils.get_cookies() base_url = cns.BASE_URL event_api = cns.FNO_HISTORY symbol = utils.get_symbol(symbol=symbol, get_key="derivatives") + + if option_type not in ["CE", "PE"]: + raise ValueError("Option type must be either CE or PE") + + try: + expiry_date = datetime.strptime(expiry_date, "%d-%m-%Y").strftime("%d-%b-%Y") + except Exception as e: + raise ValueError("Please give expiry date in %d-%b-%Y format") from e + params = { "symbol": symbol, "from": start_date, "to": end_date, "instrumentType": "OPTSTK", "optionType": option_type, - "expiryDate": datetime.strptime(expiry_date, "%d-%m-%Y").strftime("%d-%b-%Y"), + "expiryDate": expiry_date, "strikePrice": strike_price, "year": year, } @@ -165,6 +173,5 @@ def get_historical_option_data( return data_format.derivaties_options( data, - response_type=response_type, - columns_drop_list=columns_drop_list, + response_type=response_type, columns_drop_list=columns_drop_list, ) diff --git a/nsedt/equity.py b/nsedt/equity.py index 31d6d13..937b1e5 100644 --- a/nsedt/equity.py +++ b/nsedt/equity.py @@ -11,6 +11,7 @@ from nsedt import utils from nsedt.resources import constants as cns + from nsedt.utils import data_format log = logging.getLogger("root") diff --git a/nsedt/reports.py b/nsedt/reports.py new file mode 100644 index 0000000..c0c6c22 --- /dev/null +++ b/nsedt/reports.py @@ -0,0 +1,58 @@ +""" +function to download reports +""" + +import logging + +from nsedt.utils import get_cookies, fetch_csv, format_date, fetch_zip +from nsedt.resources.constants import ( + REPORT_URL, MARKET_ACTIVITY_REPORT, BHAV_COPY_REPORT) + +log = logging.getLogger("root") + + +def get_market_activity_report(date: str): + """ + get_market_activity_report + + Args:\n + date (str): date for which to download market activity report\n + response_type (str, Optional): define the response type panda_df | json . Default json\n + Returns: + string: string content of the file as right now its not possible + to format the content to json or pandas df + Expects: + date to be in format of "ddmmYY" eg: 30/04/2024 => 300424 + all other cases will be invalidated + """ + date = format_date(date, date_format='%d%m%y') + if not date: + raise ValueError("Please provide date format in '%d-%m-%Y' format") + + cookies = get_cookies() + url = f"{REPORT_URL}{MARKET_ACTIVITY_REPORT}{date}.csv" + return fetch_csv(url, cookies, response_type="raw") + + +def get_bhav_copy_zip(date: str, response_type: str="panda_df"): + """ + get_market_activity_report + + Args:\n + date (str): date for which to download market activity report\n + path (str): path to save the bhav copy zip + Returns: + bool: if the file is save to the local path or not + Expects: + date to be in format of "ddmmYY" eg: 30/04/2024 => 30APR2024 + all other cases will be invalidated + """ + + date = format_date(date, date_format='%d%b%Y') + if not date: + raise ValueError("Please provide date format in '%d-%m-%Y' format") + date = date.upper() + cookies = get_cookies() + url = f"{REPORT_URL}{BHAV_COPY_REPORT}{date[2:5]}/cm{date}bhav.csv.zip" + file_name = url.split("/")[-1].replace(".zip", "") + return fetch_zip(url, cookies, file_name=file_name, response_type=response_type) diff --git a/nsedt/resources/constants.py b/nsedt/resources/constants.py index 5163822..b4c5259 100644 --- a/nsedt/resources/constants.py +++ b/nsedt/resources/constants.py @@ -18,6 +18,7 @@ }""" BASE_URL = "https://www.nseindia.com/" +REPORT_URL = "https://nsearchives.nseindia.com/" ### EQUITY EQUITY_PRICE_HISTORY = "api/historical/securityArchives?" @@ -38,3 +39,7 @@ INDICES = ["NIFTY", "FINNIFTY", "BANKNIFTY"] VIX_HISTORY = "api/historical/vixhistory?" FNO_HISTORY = "api/historical/foCPV?" + +# Reports +MARKET_ACTIVITY_REPORT = "archives/equities/mkt/MA" +BHAV_COPY_REPORT = "content/historical/EQUITIES/2024/" diff --git a/nsedt/utils/__init__.py b/nsedt/utils/__init__.py index 5d39cc6..9bed2c6 100644 --- a/nsedt/utils/__init__.py +++ b/nsedt/utils/__init__.py @@ -2,15 +2,42 @@ utils for nsedt """ +import io import json import datetime +import zipfile + +from io import BytesIO + +from datetime import datetime, date from warnings import warn -import pandas as pd + import requests + +import pandas as pd + +from fake_http_header import FakeHttpHeader from nsedt.resources import constants as cns + + +def format_date(input_string: str, date_format: str): + """ + Args:\n + - input_string : str date format for a format to check + - format : type of string to format + Returns:\n + - str: date format in input string + """ + try: + return datetime.strptime(input_string, "%d-%m-%Y").strftime(date_format) + except ValueError: + return None + + + def get_headers(): """ Args: @@ -19,16 +46,9 @@ def get_headers(): Json: json containing nse header """ - return { - "Host": "www.nseindia.com", - "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0", - "Accept": "*/*", - "Accept-Language": "en-US,en;q=0.5", - "Accept-Encoding": "gzip, deflate, br", - "X-Requested-With": "XMLHttpRequest", - "DNT": "1", - "Connection": "keep-alive", - } + return FakeHttpHeader().as_header_dict() + + def get_cookies(): @@ -99,7 +119,7 @@ def get_symbol(symbol: str, get_key: str) -> str: if symbol in key_list: val = item[get_key] - return val if val else symbol + return val or symbol def check_nd_convert(start_date: str, end_date: str) -> datetime: @@ -116,7 +136,7 @@ def check_nd_convert(start_date: str, end_date: str) -> datetime: :return: the start_date and end_date as datetime objects. """ - if isinstance(start_date, datetime.date) and isinstance(end_date, datetime.date): + if isinstance(start_date, date) and isinstance(end_date, date): warn( """Passing start_date, end_date in date is deprecated now pass in str '%d-%m-%Y' format""", @@ -125,10 +145,78 @@ def check_nd_convert(start_date: str, end_date: str) -> datetime: ) elif isinstance(start_date, str) and isinstance(end_date, str): - start_date = datetime.datetime.strptime(start_date, "%d-%m-%Y") - end_date = datetime.datetime.strptime(end_date, "%d-%m-%Y") + start_date = datetime.strptime(start_date, "%d-%m-%Y") + end_date = datetime.strptime(end_date, "%d-%m-%Y") else: raise ValueError("Input is of an unknown type") return start_date, end_date + + + +def fetch_csv(url, cookies, response_type="panda_df"): + """ + Args: + + url (str): URL to fetch + cookies (str): NSE cookies + key (str, Optional): + + Returns: + + Pandas DataFrame: df generated from csv + OR + Json: json output of the csv + OR + String: raw content for files where it cannot be processed into Json or + Pandas df + + """ + + response = requests.get( + url=url, timeout=30, headers=get_headers(), cookies=cookies ) + if response.status_code == 200: + if response_type == "raw": + return response.content + csv_content = response.content.decode('utf-8') + df = pd.read_csv(io.StringIO(csv_content), error_bad_lines=False) + return df.to_json(orient='records') if response_type == "json" else df + raise ValueError("Please try again in a minute.") + + +def fetch_zip(url, cookies, file_name, response_type="panda_df"): + """ + Args: + + url (str): URL to fetch + cookies (str): NSE cookies + key (str, Optional): + + Returns: + + Pandas DataFrame: df generated from csv + OR + Json: json output of the csv + OR + Pandas DF: Pandas df of the csv file + """ + + if not file_name: + raise ValueError("Please give file name to return") + + response = requests.get( + url=url, timeout=30, headers=get_headers(), cookies=cookies ) + if response.status_code == 200: + zip_content = BytesIO(response.content) + # Open the zip file in memory + with zipfile.ZipFile(zip_content, 'r') as zip_ref: + # Retrieve the list of file names in the zip file + try: + csv_content = zip_ref.read(file_name) + except Exception as e: + raise ValueError("File not found in the zip folder.") from e + + df = pd.read_csv(BytesIO(csv_content)) + return df.to_json(orient='records') if response_type == "json" else df + raise ValueError("File might not be available this time or check your params") diff --git a/requirements.txt b/requirements.txt index d57ab90..01e0694 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ requests numpy pandas Deprecated +fake-http-header diff --git a/setup.py b/setup.py index 2ca9d3a..ce26835 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ "requests", "numpy", "pandas", + "fake-http-header" ], classifiers=[ "Development Status :: 3 - Alpha", diff --git a/tests/test_derivatives_options.py b/tests/test_derivatives_options.py index 45c7801..4b73e60 100644 --- a/tests/test_derivatives_options.py +++ b/tests/test_derivatives_options.py @@ -3,8 +3,9 @@ """ from typing import List + import pandas as pd -import pytest + from nsedt.derivatives import options as opt START_DATE = "01-01-2024" @@ -70,7 +71,6 @@ def test_get_option_chain_expdate(): assert len(data) > 1 -@pytest.mark.skip("Need to Fix code") def test_get_historical_option_data(): """ Test the get_historical_option_data function from nsedt.derivatives.options module. diff --git a/tests/test_reports.py b/tests/test_reports.py new file mode 100644 index 0000000..4122bd4 --- /dev/null +++ b/tests/test_reports.py @@ -0,0 +1,25 @@ +""" + Test case for nsedt.reports +""" + +import pandas as pd +from nsedt.reports import get_bhav_copy_zip, get_market_activity_report + +# modify date to the latest date +REPORT_DATE = "02-05-2024" + + +def test_get_market_activity_report(): + """ + Test get market activity report for a given date + """ + data = get_market_activity_report(date=REPORT_DATE) + assert isinstance(data, bytes) + + +def test_get_bhav_copy_zip(): + """ + Test get bhav copy report for a given date + """ + data = get_bhav_copy_zip(date=REPORT_DATE) + assert isinstance(data, pd.DataFrame)