diff --git a/.gitignore b/.gitignore index dfe3f06..628216c 100644 --- a/.gitignore +++ b/.gitignore @@ -111,4 +111,4 @@ test.py *-problems_with_citation.txt erro.txt author.* - +.idea/* diff --git a/util/crawler.py b/util/crawler.py index c77e1c9..6c81bc0 100644 --- a/util/crawler.py +++ b/util/crawler.py @@ -60,7 +60,8 @@ def _normalize_names(pub): r' D[AEIOU]\s+', r'-', r'DOS', - r'DAS' + r'DAS', + r'\d+' ) @@ -87,7 +88,7 @@ def _normalize_names(pub): # Check if last_name is made up by two names last_names = names[0].split(' ') - # Convert last_name last_name, name to last_name, name last_name + # Convert `last_name last_name, name` to `last_name, name last_name` if len(last_names) > 1: names[0] = last_names[-1] names[1] += " " + " ".join(last_names[:-1])