Skip to content

Commit

Permalink
Merge pull request #349 from vss-2/issue-348
Browse files Browse the repository at this point in the history
[python] Fix missing html5lib dependency #348
  • Loading branch information
rafapereirabr authored Apr 23, 2024
2 parents c7376ed + f6fd5af commit 6e87524
Show file tree
Hide file tree
Showing 4 changed files with 282 additions and 186 deletions.
10 changes: 7 additions & 3 deletions python-package/geobr/list_geobr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pandas as pd
from io import StringIO
from urllib.error import HTTPError

import re

def list_geobr():
"""Prints available functions, according to latest README.md file
Expand All @@ -19,8 +19,12 @@ def list_geobr():

try:
html_data = get("https://github.com/ipeaGIT/geobr/blob/master/README.md").text

df = pd.read_html(StringIO(html_data))[1]
find_emoji = html_data.index("👉")
html_data = html_data[find_emoji:]
escaped_data = html_data.replace("\\u003c", "<").replace("\\u003e", ">")
tables = re.findall("<table>(.+?)</table>", escaped_data)
available_datasets = "<table>" + tables[0].replace("\\n", "") + "</table>"
df = pd.DataFrame(pd.read_html(StringIO(available_datasets))[0])

except HTTPError:
print(
Expand Down
Loading

0 comments on commit 6e87524

Please sign in to comment.