diff --git a/ckanext/fairdatapoint/profiles.py b/ckanext/fairdatapoint/profiles.py index 1d3ce65..b414c36 100644 --- a/ckanext/fairdatapoint/profiles.py +++ b/ckanext/fairdatapoint/profiles.py @@ -69,14 +69,19 @@ def validate_tags(values_list: List[Dict]) -> List: tags = [] for item in values_list: tag_value = item['name'] - find_illegal = re.search(illegal_pattern, tag_value) - if find_illegal: - log.warning(f'Tag {tag_value} contains values other than alphanumeric characters, spaces, hyphens, ' - f'underscores or dots, they will be replaces with spaces') - tag = {'name': re.sub(illegal_pattern, ' ', tag_value)} - tags.append(tag) + if len(tag_value) < 2: + log.warning(f'Tag {tag_value} is shorter than 2 characters and will be removed') + elif len(tag_value) > 100: + log.warning(f'Tag {tag_value} is longer than 100 characters and will be removed') else: - tags.append(item) + find_illegal = re.search(illegal_pattern, tag_value) + if find_illegal: + log.warning(f'Tag {tag_value} contains values other than alphanumeric characters, spaces, hyphens, ' + f'underscores or dots, they will be replaces with spaces') + tag = {'name': re.sub(illegal_pattern, ' ', tag_value)} + tags.append(tag) + else: + tags.append(item) return tags diff --git a/ckanext/fairdatapoint/tests/test_profiles.py b/ckanext/fairdatapoint/tests/test_profiles.py index 67c1fa4..427975c 100644 --- a/ckanext/fairdatapoint/tests/test_profiles.py +++ b/ckanext/fairdatapoint/tests/test_profiles.py @@ -32,6 +32,9 @@ ([{"name": "CNS/Brain"}], [{"name": "CNS Brain"}]), ([{"name": "COVID-19"}, {"name": "3`-DNA"}], [{"name": "COVID-19"}, {"name": "3 -DNA"}]), ([{"name": "something-1.1"}, {"name": "breast cancer"}], [{"name": "something-1.1"}, {"name": "breast cancer"}]), + ([{"name": "-"}], []), + ([{"name": "It is a ridiculously long (more 100 chars) text for a tag therefore it should be removed from the " + "result to prevent CKAN harvester from failing"}], []), ([], []) ]) def test_validate_tags(input_tags, expected_tags):