Reports: Added logic to download bhav copy and market activity report…

…s. (#50) Co-authored-by: Witchking <[email protected]>
pratik141 · May 4, 2024 · afc9fe7 · afc9fe7
1 parent 5bec16d
commit afc9fe7
Show file tree

Hide file tree

Showing 11 changed files with 248 additions and 42 deletions.
diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@ Thank you for using Nsedt. Please feel free to send pull requests, comments, and
    `pip install -r requirements.txt`
 3. Install locally
    `pip install . --upgrade`
+
 ---
 
 ## Equity
@@ -84,13 +85,14 @@ data["Date"] = pd.to_datetime(data["Date"],format='%d-%b-%Y')
 
 ### Details
 
-| Name                     | Module name              | Description              | Argument                                                                  | Response |
-| ------------------------ | ------------------------ | ------------------------ | ------------------------------------------------------------------------- | -------- |
-| vix                      | get_vix                  | price                    | start_date, end_date,columns_drop_list                                    | panda df |
-| option chain             | get_option_chain         | get option price         | symbol,strikePrice,expiryDate                                             | panda df |
-| option chain expiry date | get_option_chain_expdate | option chain expiry date | symbol                                                                    | json     |
-| future price             | get_future_price         | get future price         | symbol, start_date, end_date, expiryDate,response_type, columns_drop_list | panda df |
-| future expiry date       | get_future_expdate       | future expiry date       | symbol                                                                    | json     |
+| Name                     | Module name                | Description                                          | Argument                                                                  | Response        |
+| ------------------------ | -------------------------- | ---------------------------------------------------- | ------------------------------------------------------------------------- | --------------- |
+| vix                      | get_vix                    | price                                                | start_date, end_date,columns_drop_list                                    | panda df        |
+| option chain             | get_option_chain           | get option price                                     | symbol,strikePrice,expiryDate                                             | panda df        |
+| option chain expiry date | get_option_chain_expdate   | option chain expiry date                             | symbol                                                                    | json            |
+| future price             | get_future_price           | get future price                                     | symbol, start_date, end_date, expiryDate,response_type, columns_drop_list | panda df        |
+| future expiry date       | get_future_expdate         | future expiry date                                   | symbol                                                                    | json            |
+| historical option chain  | get_historical_option_data | get historical option value for a given strike price | symbol, start_date,end_date,option_type,strike_price,year,expiry_date     | json, pandas df |
 
 ### step to run
 
@@ -99,10 +101,31 @@ from nsedt import derivatives as de
 start_date = "24-04-2024"
 end_date = "25-04-2024"
 # date format "%d-%m-%Y"
+
 print(de.get_vix(start_date, end_date))
 print(de.get_option_chain_expdate(symbol="TCS"))
 print(de.get_option_chain(symbol="TCS", strike_price=3300, expiry_date="30-05-2024"))
 print(de.get_future_price(symbol="TCS", start_date=start_date, end_date=end_date))
 print(de.get_future_expdate(symbol="TCS"))
 print(de.get_historical_option_data(symbol="TATAMOTORS", start_date=start_date, end_date=end_date, option_type="CE", strike_price="1020", year="2024", expiry_date="30-May-2024"))
 ```
+
+# Reports
+
+### Details
+
+| Name            | Module name                | Description                            | Argument                | Response |
+| --------------- | -------------------------- | -------------------------------------- | ----------------------- | -------- |
+| market activity | get_market_activity_report | get raw text of market activity report | date                    | string   |
+| bhav copy       | get_bhav_copy_zip          | download bhav copy zip for a given day | date, file_path_to_save | bool     |
+
+### step to run
+
+```py
+from nsedt import reports as rep
+# date format "%d-%m-%Y"
+
+print(rep.get_market_activity_report(date="300424")) # format %d%m%y
+print(rep.get_bhav_copy_zip(date="30APR2024", file_path="path_where_you_want_to_save")) # format %d%b%Y
+
+```
diff --git a/nsedt/derivatives/__init__.py b/nsedt/derivatives/__init__.py
@@ -12,10 +12,7 @@
 from nsedt.resources import constants as cns
 from nsedt.utils import data_format
 from nsedt.derivatives.options import (
-                                        get_option_chain,
-                                        get_option_chain_expdate,
-                                        get_historical_option_data
-                                    )
+    get_option_chain, get_option_chain_expdate, get_historical_option_data)
 from nsedt.derivatives.futures import get_future_price, get_future_expdate
 
 log = logging.getLogger("root")

diff --git a/nsedt/derivatives/options.py b/nsedt/derivatives/options.py
@@ -129,31 +129,39 @@ def get_historical_option_data(
     """
     Get historical data for option chain for a given expiry
     Args:
-        symbol (str): _description_
-        start_date (str): _description_
-        end_date (str): _description_
-        option_type (str): _description_.
-        option_type (str): _description_.
-        strike_price (str): _description_.
-        year (str): _description_.
-        expiry_date (str): _description_.
-        response_type (str, optional): _description_. Defaults to "panda_df".
-        columns_drop_list (list, optional): _description_. Defaults to None.
+        symbol (str): valid scrip name
+        start_date (str): in %d-%m-%Y format
+        end_date (str): in %d-%m-%Y format
+        option_type (str): CE or PE.
+        strike_price (str): valid integer.
+        year (str): in %Y format eg 2024.
+        expiry_date (str): in %d-%m-%Y format
+        response_type (str, optional): either json or pand_df. Defaults to "panda_df".
+        columns_drop_list (list, optional): list of columns to skip. Defaults to None.
 
     Returns:
-        _type_: _description_
+        _type_: either json or pandas df. Defaults to pandas_df
     """
     cookies = utils.get_cookies()
     base_url = cns.BASE_URL
     event_api = cns.FNO_HISTORY
     symbol = utils.get_symbol(symbol=symbol, get_key="derivatives")
+
+    if option_type not in ["CE", "PE"]:
+        raise ValueError("Option type must be either CE or PE")
+
+    try:
+        expiry_date = datetime.strptime(expiry_date, "%d-%m-%Y").strftime("%d-%b-%Y")
+    except Exception as e:
+        raise ValueError("Please give expiry date in %d-%b-%Y format") from e
+
     params = {
         "symbol": symbol,
         "from": start_date,
         "to": end_date,
         "instrumentType": "OPTSTK",
         "optionType": option_type,
-        "expiryDate": datetime.strptime(expiry_date, "%d-%m-%Y").strftime("%d-%b-%Y"),
+        "expiryDate": expiry_date,
         "strikePrice": strike_price,
         "year": year,
     }
@@ -165,6 +173,5 @@ def get_historical_option_data(
 
     return data_format.derivaties_options(
         data,
-        response_type=response_type,
-        columns_drop_list=columns_drop_list,
+        response_type=response_type, columns_drop_list=columns_drop_list,
     )
diff --git a/nsedt/equity.py b/nsedt/equity.py
@@ -11,6 +11,7 @@
 
 from nsedt import utils
 from nsedt.resources import constants as cns
+
 from nsedt.utils import data_format
 
 log = logging.getLogger("root")

diff --git a/nsedt/reports.py b/nsedt/reports.py
@@ -0,0 +1,58 @@
+"""
+function to download reports
+"""
+
+import logging
+
+from nsedt.utils import get_cookies, fetch_csv, format_date, fetch_zip
+from nsedt.resources.constants import (
+    REPORT_URL, MARKET_ACTIVITY_REPORT, BHAV_COPY_REPORT)
+
+log = logging.getLogger("root")
+
+
+def get_market_activity_report(date: str):
+    """
+    get_market_activity_report
+
+    Args:\n
+        date (str): date for which to download market activity report\n
+        response_type (str, Optional): define the response type panda_df | json . Default json\n
+    Returns:
+        string: string content of the file as right now its not possible 
+        to format the content to json or pandas df
+    Expects: 
+        date to be in format of  "ddmmYY" eg: 30/04/2024 => 300424
+        all other cases will be invalidated
+    """
+    date = format_date(date, date_format='%d%m%y')
+    if not date:
+        raise ValueError("Please provide date format in '%d-%m-%Y' format")
+
+    cookies = get_cookies()
+    url = f"{REPORT_URL}{MARKET_ACTIVITY_REPORT}{date}.csv"
+    return fetch_csv(url, cookies, response_type="raw")
+
+
+def get_bhav_copy_zip(date: str, response_type: str="panda_df"):
+    """
+    get_market_activity_report
+
+    Args:\n
+        date (str): date for which to download market activity report\n
+        path (str): path to save the bhav copy zip
+    Returns:
+        bool: if the file is save to the local path or not
+    Expects: 
+        date to be in format of  "ddmmYY" eg: 30/04/2024 => 30APR2024
+        all other cases will be invalidated
+    """
+
+    date = format_date(date, date_format='%d%b%Y')
+    if not date:
+        raise ValueError("Please provide date format in '%d-%m-%Y' format")
+    date = date.upper()
+    cookies = get_cookies()
+    url = f"{REPORT_URL}{BHAV_COPY_REPORT}{date[2:5]}/cm{date}bhav.csv.zip"
+    file_name = url.split("/")[-1].replace(".zip", "")
+    return fetch_zip(url, cookies, file_name=file_name, response_type=response_type)
diff --git a/nsedt/resources/constants.py b/nsedt/resources/constants.py
@@ -18,6 +18,7 @@
 }"""
 
 BASE_URL = "https://www.nseindia.com/"
+REPORT_URL = "https://nsearchives.nseindia.com/"
 
 ### EQUITY
 EQUITY_PRICE_HISTORY = "api/historical/securityArchives?"
@@ -38,3 +39,7 @@
 INDICES = ["NIFTY", "FINNIFTY", "BANKNIFTY"]
 VIX_HISTORY = "api/historical/vixhistory?"
 FNO_HISTORY = "api/historical/foCPV?"
+
+# Reports
+MARKET_ACTIVITY_REPORT = "archives/equities/mkt/MA"
+BHAV_COPY_REPORT = "content/historical/EQUITIES/2024/"
diff --git a/nsedt/utils/__init__.py b/nsedt/utils/__init__.py
@@ -2,15 +2,42 @@
 utils for nsedt
 """
 
+import io
 import json
 import datetime
+import zipfile
+
+from io import BytesIO
+
+from datetime import datetime, date
 from warnings import warn
-import pandas as pd
+
 import requests
+
+import pandas as pd
+
+from fake_http_header import FakeHttpHeader
 from nsedt.resources import constants as cns
 
 
 
+
+
+def format_date(input_string: str, date_format: str):
+    """
+    Args:\n
+        - input_string : str date format for a format to check
+        - format : type of string to format
+    Returns:\n
+        - str: date format in input string
+    """
+    try:
+        return datetime.strptime(input_string, "%d-%m-%Y").strftime(date_format)
+    except ValueError:
+        return None
+
+
+
 def get_headers():
     """
     Args:
@@ -19,16 +46,9 @@ def get_headers():
         Json: json containing nse header
     """
 
-    return {
-        "Host": "www.nseindia.com",
-        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0",
-        "Accept": "*/*",
-        "Accept-Language": "en-US,en;q=0.5",
-        "Accept-Encoding": "gzip, deflate, br",
-        "X-Requested-With": "XMLHttpRequest",
-        "DNT": "1",
-        "Connection": "keep-alive",
-    }
+    return FakeHttpHeader().as_header_dict()
+
+
 
 
 def get_cookies():
@@ -99,7 +119,7 @@ def get_symbol(symbol: str, get_key: str) -> str:
         if symbol in key_list:
             val = item[get_key]
 
-    return val if val else symbol
+    return val or symbol
 
 
 def check_nd_convert(start_date: str, end_date: str) -> datetime:
@@ -116,7 +136,7 @@ def check_nd_convert(start_date: str, end_date: str) -> datetime:
     :return: the start_date and end_date as datetime objects.
     """
 
-    if isinstance(start_date, datetime.date) and isinstance(end_date, datetime.date):
+    if isinstance(start_date, date) and isinstance(end_date, date):
         warn(
             """Passing start_date, end_date in date is deprecated
 now pass in str '%d-%m-%Y' format""",
@@ -125,10 +145,78 @@ def check_nd_convert(start_date: str, end_date: str) -> datetime:
         )
 
     elif isinstance(start_date, str) and isinstance(end_date, str):
-        start_date = datetime.datetime.strptime(start_date, "%d-%m-%Y")
-        end_date = datetime.datetime.strptime(end_date, "%d-%m-%Y")
+        start_date = datetime.strptime(start_date, "%d-%m-%Y")
+        end_date = datetime.strptime(end_date, "%d-%m-%Y")
 
     else:
         raise ValueError("Input is of an unknown type")
 
     return start_date, end_date
+
+
+
+def fetch_csv(url, cookies, response_type="panda_df"):
+    """
+    Args:
+
+        url (str): URL to fetch
+        cookies (str): NSE cookies
+        key (str, Optional):
+
+    Returns:
+
+        Pandas DataFrame: df generated from csv
+        OR
+        Json: json output of the csv
+        OR
+        String: raw content for files where it cannot be processed into Json or 
+                Pandas df
+
+    """
+
+    response = requests.get(
+        url=url, timeout=30, headers=get_headers(), cookies=cookies )
+    if response.status_code == 200:
+        if response_type == "raw":
+            return response.content
+        csv_content = response.content.decode('utf-8')
+        df = pd.read_csv(io.StringIO(csv_content), error_bad_lines=False)
+        return df.to_json(orient='records') if response_type == "json" else df
+    raise ValueError("Please try again in a minute.")
+
+
+def fetch_zip(url, cookies, file_name, response_type="panda_df"):
+    """
+    Args:
+
+        url (str): URL to fetch
+        cookies (str): NSE cookies
+        key (str, Optional):
+
+    Returns:
+
+        Pandas DataFrame: df generated from csv
+        OR
+        Json: json output of the csv
+        OR
+        Pandas DF:  Pandas df of the csv file
+    """
+
+    if not file_name:
+        raise ValueError("Please give file name to return")
+
+    response = requests.get(
+        url=url, timeout=30, headers=get_headers(), cookies=cookies )
+    if response.status_code == 200:
+        zip_content = BytesIO(response.content)
+        # Open the zip file in memory
+        with zipfile.ZipFile(zip_content, 'r') as zip_ref:
+            # Retrieve the list of file names in the zip file
+            try:
+                csv_content = zip_ref.read(file_name)
+            except Exception as e:
+                raise ValueError("File not found in the zip folder.") from e
+
+            df = pd.read_csv(BytesIO(csv_content))
+            return df.to_json(orient='records') if response_type == "json" else df
+    raise ValueError("File might not be available this time or check your params")
diff --git a/requirements.txt b/requirements.txt
@@ -2,3 +2,4 @@ requests
 numpy
 pandas
 Deprecated
+fake-http-header
diff --git a/setup.py b/setup.py
@@ -21,6 +21,7 @@
         "requests",
         "numpy",
         "pandas",
+        "fake-http-header"
     ],
     classifiers=[
         "Development Status :: 3 - Alpha",