From 364ba76ce33b8e1c88d729ed56373f05be9c0973 Mon Sep 17 00:00:00 2001 From: DaveMorais Date: Thu, 1 Nov 2018 15:47:08 -0400 Subject: [PATCH] Remove numbers from names --- .gitignore | 2 +- util/crawler.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index dfe3f06..628216c 100644 --- a/.gitignore +++ b/.gitignore @@ -111,4 +111,4 @@ test.py *-problems_with_citation.txt erro.txt author.* - +.idea/* diff --git a/util/crawler.py b/util/crawler.py index c77e1c9..6c81bc0 100644 --- a/util/crawler.py +++ b/util/crawler.py @@ -60,7 +60,8 @@ def _normalize_names(pub): r' D[AEIOU]\s+', r'-', r'DOS', - r'DAS' + r'DAS', + r'\d+' ) @@ -87,7 +88,7 @@ def _normalize_names(pub): # Check if last_name is made up by two names last_names = names[0].split(' ') - # Convert last_name last_name, name to last_name, name last_name + # Convert `last_name last_name, name` to `last_name, name last_name` if len(last_names) > 1: names[0] = last_names[-1] names[1] += " " + " ".join(last_names[:-1])