From 149909a56388a939f53ff242011b15d5322852a4 Mon Sep 17 00:00:00 2001 From: jindaxiang Date: Fri, 10 May 2024 01:01:32 +0800 Subject: [PATCH] fix: fix stock_lhb_yytj_sina --- akshare/stock_feature/stock_lhb_sina.py | 139 ++++++++++++++++-------- docs/data/stock/stock.md | 30 ++--- 2 files changed, 108 insertions(+), 61 deletions(-) diff --git a/akshare/stock_feature/stock_lhb_sina.py b/akshare/stock_feature/stock_lhb_sina.py index 343ea735169..4faec671db1 100644 --- a/akshare/stock_feature/stock_lhb_sina.py +++ b/akshare/stock_feature/stock_lhb_sina.py @@ -1,10 +1,11 @@ #!/usr/bin/env python # -*- coding:utf-8 -*- """ -Date: 2024/2/26 15:00 +Date: 2024/5/10 00:00 Desc: 新浪财经-龙虎榜 https://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml """ + from io import StringIO import pandas as pd @@ -39,16 +40,27 @@ def stock_lhb_detail_daily_sina(date: str = "20240222") -> pd.DataFrame: big_df = pd.concat(objs=[big_df, temp_df], ignore_index=True) big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6) del big_df["查看详情"] - big_df.columns = ["序号", "股票代码", "股票名称", "收盘价", "对应值", "成交量", "成交额", "指标"] - big_df['收盘价'] = pd.to_numeric(big_df['收盘价'], errors="coerce") - big_df['对应值'] = pd.to_numeric(big_df['对应值'], errors="coerce") - big_df['成交量'] = pd.to_numeric(big_df['成交量'], errors="coerce") - big_df['成交额'] = pd.to_numeric(big_df['成交额'], errors="coerce") + big_df.columns = [ + "序号", + "股票代码", + "股票名称", + "收盘价", + "对应值", + "成交量", + "成交额", + "指标", + ] + big_df["收盘价"] = pd.to_numeric(big_df["收盘价"], errors="coerce") + big_df["对应值"] = pd.to_numeric(big_df["对应值"], errors="coerce") + big_df["成交量"] = pd.to_numeric(big_df["成交量"], errors="coerce") + big_df["成交额"] = pd.to_numeric(big_df["成交额"], errors="coerce") return big_df -def _find_last_page(url: str = "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml", - recent_day: str = "60"): +def _find_last_page( + url: str = "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml", + recent_day: str = "60", +): params = { "last": recent_day, "p": "1", @@ -57,7 +69,7 @@ def _find_last_page(url: str = "https://vip.stock.finance.sina.com.cn/q/go.php/v soup = BeautifulSoup(r.text, "lxml") try: previous_page = int(soup.find_all(attrs={"class": "page"})[-2].text) - except Exception as e: + except: # noqa: E722 previous_page = 1 if previous_page != 1: while True: @@ -66,7 +78,7 @@ def _find_last_page(url: str = "https://vip.stock.finance.sina.com.cn/q/go.php/v "p": previous_page, } r = requests.get(url, params=params) - soup = BeautifulSoup(r.text, "lxml") + soup = BeautifulSoup(r.text, features="lxml") last_page = int(soup.find_all(attrs={"class": "page"})[-2].text) if last_page != previous_page: previous_page = last_page @@ -76,44 +88,56 @@ def _find_last_page(url: str = "https://vip.stock.finance.sina.com.cn/q/go.php/v return previous_page -def stock_lhb_ggtj_sina(recent_day: str = "30") -> pd.DataFrame: +def stock_lhb_ggtj_sina(symbol: str = "5") -> pd.DataFrame: """ 龙虎榜-个股上榜统计 https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml - :param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} - :type recent_day: str - :return: 龙虎榜-每日详情 + :param symbol: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} + :type symbol: str + :return: 龙虎榜-个股上榜统计 :rtype: pandas.DataFrame """ - url = "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml" - last_page_num = _find_last_page(url, recent_day) + url = ( + "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml" + ) + last_page_num = _find_last_page(url, symbol) big_df = pd.DataFrame() tqdm = get_tqdm() for page in tqdm(range(1, last_page_num + 1), leave=False): params = { - "last": recent_day, + "last": symbol, "p": page, } r = requests.get(url, params=params) temp_df = pd.read_html(StringIO(r.text))[0].iloc[0:, :] - big_df = pd.concat([big_df, temp_df], ignore_index=True) + big_df = pd.concat(objs=[big_df, temp_df], ignore_index=True) big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6) - big_df.columns = ["股票代码", "股票名称", "上榜次数", "累积购买额", "累积卖出额", "净额", "买入席位数", - "卖出席位数"] + big_df.columns = [ + "股票代码", + "股票名称", + "上榜次数", + "累积购买额", + "累积卖出额", + "净额", + "买入席位数", + "卖出席位数", + ] return big_df -def stock_lhb_yytj_sina(recent_day: str = "5") -> pd.DataFrame: +def stock_lhb_yytj_sina(symbol: str = "5") -> pd.DataFrame: """ 龙虎榜-营业部上榜统计 https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml - :param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} - :type recent_day: str + :param symbol: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} + :type symbol: str :return: 龙虎榜-营业部上榜统计 :rtype: pandas.DataFrame """ - url = "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml" - last_page_num = _find_last_page(url, recent_day) + url = ( + "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml" + ) + last_page_num = _find_last_page(url, symbol) big_df = pd.DataFrame() tqdm = get_tqdm() for page in tqdm(range(1, last_page_num + 1), leave=False): @@ -124,29 +148,39 @@ def stock_lhb_yytj_sina(recent_day: str = "5") -> pd.DataFrame: r = requests.get(url, params=params) temp_df = pd.read_html(StringIO(r.text))[0].iloc[0:, :] big_df = pd.concat([big_df, temp_df], ignore_index=True) - big_df.columns = ["营业部名称", "上榜次数", "累积购买额", "买入席位数", "累积卖出额", "卖出席位数", "买入前三股票"] + big_df.columns = [ + "营业部名称", + "上榜次数", + "累积购买额", + "买入席位数", + "累积卖出额", + "卖出席位数", + "买入前三股票", + ] big_df["上榜次数"] = pd.to_numeric(big_df["上榜次数"], errors="coerce") big_df["买入席位数"] = pd.to_numeric(big_df["买入席位数"], errors="coerce") big_df["卖出席位数"] = pd.to_numeric(big_df["卖出席位数"], errors="coerce") return big_df -def stock_lhb_jgzz_sina(recent_day: str = "5") -> pd.DataFrame: +def stock_lhb_jgzz_sina(symbol: str = "5") -> pd.DataFrame: """ 龙虎榜-机构席位追踪 https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml - :param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} - :type recent_day: str + :param symbol: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} + :type symbol: str :return: 龙虎榜-机构席位追踪 :rtype: pandas.DataFrame """ - url = "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml" - last_page_num = _find_last_page(url, recent_day) + url = ( + "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml" + ) + last_page_num = _find_last_page(url, symbol) big_df = pd.DataFrame() tqdm = get_tqdm() for page in tqdm(range(1, last_page_num + 1), leave=False): params = { - "last": recent_day, + "last": symbol, "p": page, } r = requests.get(url, params=params) @@ -157,7 +191,15 @@ def stock_lhb_jgzz_sina(recent_day: str = "5") -> pd.DataFrame: big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6) del big_df["当前价"] del big_df["涨跌幅"] - big_df.columns = ["股票代码", "股票名称", "累积买入额", "买入次数", "累积卖出额", "卖出次数", "净额"] + big_df.columns = [ + "股票代码", + "股票名称", + "累积买入额", + "买入次数", + "累积卖出额", + "卖出次数", + "净额", + ] big_df["买入次数"] = pd.to_numeric(big_df["买入次数"], errors="coerce") big_df["卖出次数"] = pd.to_numeric(big_df["卖出次数"], errors="coerce") return big_df @@ -170,7 +212,9 @@ def stock_lhb_jgmx_sina() -> pd.DataFrame: :return: 龙虎榜-机构席位成交明细 :rtype: pandas.DataFrame """ - url = "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml" + url = ( + "https://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml" + ) params = { "p": "1", } @@ -178,7 +222,7 @@ def stock_lhb_jgmx_sina() -> pd.DataFrame: soup = BeautifulSoup(r.text, features="lxml") try: last_page_num = int(soup.find_all(attrs={"class": "page"})[-2].text) - except: + except: # noqa: E722 last_page_num = 1 big_df = pd.DataFrame() tqdm = get_tqdm() @@ -190,27 +234,30 @@ def stock_lhb_jgmx_sina() -> pd.DataFrame: temp_df = pd.read_html(StringIO(r.text))[0].iloc[0:, :] big_df = pd.concat(objs=[big_df, temp_df], ignore_index=True) big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6) - big_df['交易日期'] = pd.to_datetime(big_df['交易日期'], errors="coerce").dt.date - big_df.rename(columns={"机构席位买入额(万)": "机构席位买入额", "机构席位卖出额(万)": "机构席位卖出额"}, - inplace=True) - big_df['机构席位买入额'] = pd.to_numeric(big_df['机构席位买入额'], errors="coerce") - big_df['机构席位卖出额'] = pd.to_numeric(big_df['机构席位卖出额'], errors="coerce") + big_df["交易日期"] = pd.to_datetime(big_df["交易日期"], errors="coerce").dt.date + big_df.rename( + columns={ + "机构席位买入额(万)": "机构席位买入额", + "机构席位卖出额(万)": "机构席位卖出额", + }, + inplace=True, + ) + big_df["机构席位买入额"] = pd.to_numeric(big_df["机构席位买入额"], errors="coerce") + big_df["机构席位卖出额"] = pd.to_numeric(big_df["机构席位卖出额"], errors="coerce") return big_df if __name__ == "__main__": - stock_lhb_detail_daily_sina_df = stock_lhb_detail_daily_sina( - date="20240222" - ) + stock_lhb_detail_daily_sina_df = stock_lhb_detail_daily_sina(date="20240222") print(stock_lhb_detail_daily_sina_df) - stock_lhb_ggtj_sina_df = stock_lhb_ggtj_sina(recent_day="5") + stock_lhb_ggtj_sina_df = stock_lhb_ggtj_sina(symbol="5") print(stock_lhb_ggtj_sina_df) - stock_lhb_yytj_sina_df = stock_lhb_yytj_sina(recent_day="5") + stock_lhb_yytj_sina_df = stock_lhb_yytj_sina(symbol="5") print(stock_lhb_yytj_sina_df) - stock_lhb_jgzz_sina_df = stock_lhb_jgzz_sina(recent_day="5") + stock_lhb_jgzz_sina_df = stock_lhb_jgzz_sina(symbol="5") print(stock_lhb_jgzz_sina_df) stock_lhb_jgmx_sina_df = stock_lhb_jgmx_sina() diff --git a/docs/data/stock/stock.md b/docs/data/stock/stock.md index 962431c3b3b..063496dc971 100644 --- a/docs/data/stock/stock.md +++ b/docs/data/stock/stock.md @@ -17057,13 +17057,13 @@ print(stock_lhb_detail_daily_sina_df) 描述: 新浪财经-龙虎榜-个股上榜统计 -限量: 单次返回指定 recent_day 的所有历史数据 +限量: 单次返回指定 symbol 的所有历史数据 输入参数 -| 名称 | 类型 | 描述 | -|------------|-----|---------------------------------------------------------------------------------------| -| recent_day | str | recent_day="5"; choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} | +| 名称 | 类型 | 描述 | +|--------|-----|-----------------------------------------------------------------------------------| +| symbol | str | symbol="5"; choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} | 输出参数 @@ -17083,7 +17083,7 @@ print(stock_lhb_detail_daily_sina_df) ```python import akshare as ak -stock_lhb_ggtj_sina_df = ak.stock_lhb_ggtj_sina(recent_day="5") +stock_lhb_ggtj_sina_df = ak.stock_lhb_ggtj_sina(symbol="5") print(stock_lhb_ggtj_sina_df) ``` @@ -17113,13 +17113,13 @@ print(stock_lhb_ggtj_sina_df) 描述: 新浪财经-龙虎榜-营业上榜统计 -限量: 单次返回指定 recent_day 的所有历史数据 +限量: 单次返回指定 symbol 的所有历史数据 输入参数 -| 名称 | 类型 | 描述 | -|------------|-----|---------------------------------------------------------------------------------------| -| recent_day | str | recent_day="5"; choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} | +| 名称 | 类型 | 描述 | +|--------|-----|-----------------------------------------------------------------------------------| +| symbol | str | symbol="5"; choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} | 输出参数 @@ -17138,7 +17138,7 @@ print(stock_lhb_ggtj_sina_df) ```python import akshare as ak -stock_lhb_yytj_sina_df = ak.stock_lhb_yytj_sina(recent_day="5") +stock_lhb_yytj_sina_df = ak.stock_lhb_yytj_sina(symbol="5") print(stock_lhb_yytj_sina_df) ``` @@ -17168,13 +17168,13 @@ print(stock_lhb_yytj_sina_df) 描述: 新浪财经-龙虎榜-机构席位追踪 -限量: 单次返回指定 recent_day 的所有历史数据 +限量: 单次返回指定 symbol 的所有历史数据 输入参数 -| 名称 | 类型 | 描述 | -|------------|-----|---------------------------------------------------------------------------------------| -| recent_day | str | recent_day="5"; choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} | +| 名称 | 类型 | 描述 | +|--------|-----|-----------------------------------------------------------------------------------| +| symbol | str | symbol="5"; choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;} | 输出参数 @@ -17193,7 +17193,7 @@ print(stock_lhb_yytj_sina_df) ```python import akshare as ak -stock_lhb_jgzz_sina_df = ak.stock_lhb_jgzz_sina(recent_day="5") +stock_lhb_jgzz_sina_df = ak.stock_lhb_jgzz_sina(symbol="5") print(stock_lhb_jgzz_sina_df) ```