Skip to content

Commit

Permalink
fix: fix drewry_wci_index
Browse files Browse the repository at this point in the history
  • Loading branch information
albertandking committed Sep 26, 2024
1 parent db1eca1 commit 555db70
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 22 deletions.
33 changes: 17 additions & 16 deletions akshare/index/index_drewry.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2023/9/30 15:30
Date: 2024/9/26 18:00
Desc: Drewry 集装箱指数
https://www.drewry.co.uk/supply-chain-advisors/supply-chain-expertise/world-container-index-assessed-by-drewry
https://infogram.com/world-container-index-1h17493095xl4zj
"""

import pandas as pd
import requests
from bs4 import BeautifulSoup
Expand All @@ -17,7 +18,8 @@ def drewry_wci_index(symbol: str = "composite") -> pd.DataFrame:
"""
Drewry 集装箱指数
https://infogram.com/world-container-index-1h17493095xl4zj
:param symbol: choice of {"composite", "shanghai-rotterdam", "rotterdam-shanghai", "shanghai-los angeles", "los angeles-shanghai", "shanghai-genoa", "new york-rotterdam", "rotterdam-new york"}
:param symbol: choice of {"composite", "shanghai-rotterdam", "rotterdam-shanghai", "shanghai-los angeles",
"los angeles-shanghai", "shanghai-genoa", "new york-rotterdam", "rotterdam-new york"}
:type symbol: str
:return: Drewry 集装箱指数
:rtype: pandas.DataFrame
Expand All @@ -34,24 +36,23 @@ def drewry_wci_index(symbol: str = "composite") -> pd.DataFrame:
}
url = "https://infogram.com/world-container-index-1h17493095xl4zj"
r = requests.get(url)
soup = BeautifulSoup(r.text, "lxml")
soup = BeautifulSoup(r.text, features="lxml")
data_text = soup.find_all("script")[-4].string.strip("window.infographicData=")[:-1]
data_json = demjson.decode(data_text)
temp_df = pd.DataFrame(data_json["elements"][2]["data"][symbol_map[symbol]])
temp_df = temp_df.iloc[1:, :]
data_json_need = data_json["elements"]["content"]["content"]["entities"][
"7a55585f-3fb3-44e6-9b54-beea1cd20b4d"
]["data"][symbol_map[symbol]]
date_list = [item[0]["value"] for item in data_json_need[1:]]
try:
value_list = [item[1]["value"] for item in data_json_need[1:]]
except TypeError:
value_list = [item[1]["value"] for item in data_json_need[1:-1]]
temp_df = pd.DataFrame([date_list, value_list]).T
temp_df.columns = ["date", "wci"]
temp_df["date"] = [item["value"] for item in temp_df["date"]]
temp_df["wci"] = [item["value"] for item in temp_df["wci"]]
day = temp_df["date"].str.split("-", expand=True).iloc[:, 0].str.strip()
month = temp_df["date"].str.split("-", expand=True).iloc[:, 1].str.strip()
month = month.str.replace("July", "Jul")
year = temp_df["date"].str.split("-", expand=True).iloc[:, 2].str.strip()
temp_df["date"] = day + "-" + month + "-" + year
# 修正数据源中日期格式的错误
temp_df["date"] = temp_df["date"].str.replace("Sept", "Sep")
temp_df["date"] = pd.to_datetime(temp_df["date"], format="%d-%b-%y").dt.date
temp_df["date"] = pd.to_datetime(
temp_df["date"], format="%d-%b-%y", errors="coerce"
).dt.date
temp_df["wci"] = pd.to_numeric(temp_df["wci"], errors="coerce")
temp_df.reset_index(inplace=True, drop=True)
return temp_df


Expand Down
12 changes: 6 additions & 6 deletions docs/data/index/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3094,12 +3094,12 @@ print(drewry_wci_index_df)
3 2016-03-31 849.08
4 2016-04-07 868.06
.. ... ...
387 2023-08-31 1740.00
388 2023-09-07 1681.00
389 2023-09-14 1561.00
390 2023-09-21 1479.00
391 2023-09-28 1404.00
[392 rows x 2 columns]
437 2024-08-22 5319.00
438 2024-08-29 5181.00
439 2024-09-05 4775.00
440 2024-09-12 4168.00
441 2024-09-19 3970.00
[442 rows x 2 columns]
```

### 公路物流指数
Expand Down

0 comments on commit 555db70

Please sign in to comment.