From af99af7b8e97ec2379c8addb9c49080f3d4f34e0 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 6 Feb 2024 03:12:15 +0530 Subject: [PATCH 01/18] Added new realtion property and added new function to add relation property --- iyp/__init__.py | 22 +++++++++++- iyp/crawlers/rov/README.md | 0 iyp/crawlers/rov/rovista.py | 72 +++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 iyp/crawlers/rov/README.md create mode 100644 iyp/crawlers/rov/rovista.py diff --git a/iyp/__init__.py b/iyp/__init__.py index 7dcfe36..07bfeb3 100644 --- a/iyp/__init__.py +++ b/iyp/__init__.py @@ -576,10 +576,30 @@ def batch_add_properties(self, id_prop_list): MATCH (n) WHERE ID(n) = item.id SET n += item.props""" - res = self.tx.run(add_query, batch=batch) res.consume() self.commit() + + + # Function to execute the Cypher query to set properties on relationships + def add_relationship_properties(self,node_label_properties, relationship, connected_node_label_properties,properties): + """ + he function goes through each provided property and adds it to the relationship + between the specified nodes, but only if that property doesn't already + exist in the relationship. + """ + property_setters = ", ".join([f"rel.{prop} = ${{props}}.${{prop}}" for prop in properties.keys()]) + add_query = f""" + MATCH (node1:{node_label_properties})-[rel:{relationship}]-(node2:{connected_node_label_properties}) + WITH rel + UNWIND keys($properties) AS prop + WHERE NOT EXISTS(rel[prop]) + SET {property_setters} + """ + + res = self.tx.run(add_query,properties=properties) + res.consume() + self.commit() class BasePostProcess(object): diff --git a/iyp/crawlers/rov/README.md b/iyp/crawlers/rov/README.md new file mode 100644 index 0000000..e69de29 diff --git a/iyp/crawlers/rov/rovista.py b/iyp/crawlers/rov/rovista.py new file mode 100644 index 0000000..307d38d --- /dev/null +++ b/iyp/crawlers/rov/rovista.py @@ -0,0 +1,72 @@ +import argparse +import logging +import os +import sys + +import requests + +from iyp import BaseCrawler, RequestStatusError + +URL = 'https://api.rovista.netsecurelab.org/rovista/api/overview' +ORG = 'ROV' +NAME = 'rov.rovista' + + +class Crawler(BaseCrawler): + + def run(self): + """Fetch the prefix to ASN file from BGPKIT website and process lines one by + one.""" + + batch_size = 1000 # Adjust batch size as needed + offset = 0 + while True: + # Make a request with the current offset + response = requests.get(URL, params={"offset": offset, "count": batch_size}) + if response.status_code != 200: + raise RequestStatusError('Error while fetching RoVista data') + data = response.json().get('data', []) + for entry in data: + asn = entry['asn'] + ratio = entry['ratio'] + if ratio > 0.5: + self.iyp.add_relationship_properties(node_label_properties=f"AS{{asn: {asn}}}",relationship="CATEGORIZED",connected_node_label_properties='Tag{label:"Validating RPKI ROV"}', + properties={'ratio':ratio}) + else: + self.iyp.add_relationship_properties(node_label_properties=f"AS{{asn: {asn}}}",relationship="CATEGORIZED",connected_node_label_properties='Tag{label:"Not Validating RPKI ROV"}', + properties={'ratio':ratio}) + # Move to the next page + offset += batch_size + # Break the loop if there's no more data + if len(data) < batch_size: + break + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument('--unit-test', action='store_true') + args = parser.parse_args() + + scriptname = os.path.basename(sys.argv[0]).replace('/', '_')[0:-3] + FORMAT = '%(asctime)s %(levelname)s %(message)s' + logging.basicConfig( + format=FORMAT, + filename='log/' + scriptname + '.log', + level=logging.INFO, + datefmt='%Y-%m-%d %H:%M:%S' + ) + + logging.info(f'Started: {sys.argv}') + + crawler = Crawler(ORG, URL, NAME) + if args.unit_test: + crawler.unit_test(logging) + else: + crawler.run() + crawler.close() + logging.info(f'Finished: {sys.argv}') + + +if __name__ == '__main__': + main() + sys.exit(0) From d12b3e86299842393d1b050f57d5d6dc564f0f0c Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 6 Feb 2024 03:15:45 +0530 Subject: [PATCH 02/18] Added new realtion property and added new function to add relation property --- iyp/crawlers/rov/rovista.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/iyp/crawlers/rov/rovista.py b/iyp/crawlers/rov/rovista.py index 307d38d..3dfd59d 100644 --- a/iyp/crawlers/rov/rovista.py +++ b/iyp/crawlers/rov/rovista.py @@ -30,11 +30,9 @@ def run(self): asn = entry['asn'] ratio = entry['ratio'] if ratio > 0.5: - self.iyp.add_relationship_properties(node_label_properties=f"AS{{asn: {asn}}}",relationship="CATEGORIZED",connected_node_label_properties='Tag{label:"Validating RPKI ROV"}', - properties={'ratio':ratio}) + self.iyp.add_relationship_properties(node_label_properties=f"AS{{asn: {asn}}}",relationship="CATEGORIZED",connected_node_label_properties='Tag{label:"Validating RPKI ROV"}',properties={'ratio':ratio}) else: - self.iyp.add_relationship_properties(node_label_properties=f"AS{{asn: {asn}}}",relationship="CATEGORIZED",connected_node_label_properties='Tag{label:"Not Validating RPKI ROV"}', - properties={'ratio':ratio}) + self.iyp.add_relationship_properties(node_label_properties=f"AS{{asn: {asn}}}",relationship="CATEGORIZED",connected_node_label_properties='Tag{label:"Not Validating RPKI ROV"}',properties={'ratio':ratio}) # Move to the next page offset += batch_size # Break the loop if there's no more data From 286abde36729fdf789d7534f8f4ccd5b1d24a2db Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 6 Feb 2024 03:24:28 +0530 Subject: [PATCH 03/18] rectify precommit --- iyp/__init__.py | 11 ++++++++--- iyp/crawlers/rov/rovista.py | 14 ++++++++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/iyp/__init__.py b/iyp/__init__.py index 07bfeb3..3f74945 100644 --- a/iyp/__init__.py +++ b/iyp/__init__.py @@ -582,9 +582,14 @@ def batch_add_properties(self, id_prop_list): # Function to execute the Cypher query to set properties on relationships - def add_relationship_properties(self,node_label_properties, relationship, connected_node_label_properties,properties): + def add_relationship_properties( + self, + node_label_properties, + relationship, + connected_node_label_properties, + properties): """ - he function goes through each provided property and adds it to the relationship + The function goes through each provided property and adds it to the relationship between the specified nodes, but only if that property doesn't already exist in the relationship. """ @@ -596,7 +601,7 @@ def add_relationship_properties(self,node_label_properties, relationship, connec WHERE NOT EXISTS(rel[prop]) SET {property_setters} """ - + res = self.tx.run(add_query,properties=properties) res.consume() self.commit() diff --git a/iyp/crawlers/rov/rovista.py b/iyp/crawlers/rov/rovista.py index 3dfd59d..5c8f2db 100644 --- a/iyp/crawlers/rov/rovista.py +++ b/iyp/crawlers/rov/rovista.py @@ -18,10 +18,12 @@ def run(self): """Fetch the prefix to ASN file from BGPKIT website and process lines one by one.""" + batch_size = 1000 # Adjust batch size as needed offset = 0 while True: # Make a request with the current offset + response = requests.get(URL, params={"offset": offset, "count": batch_size}) if response.status_code != 200: raise RequestStatusError('Error while fetching RoVista data') @@ -30,9 +32,17 @@ def run(self): asn = entry['asn'] ratio = entry['ratio'] if ratio > 0.5: - self.iyp.add_relationship_properties(node_label_properties=f"AS{{asn: {asn}}}",relationship="CATEGORIZED",connected_node_label_properties='Tag{label:"Validating RPKI ROV"}',properties={'ratio':ratio}) + self.iyp.add_relationship_properties( + node_label_properties=f"AS{{asn: {asn}}}", + relationship="CATEGORIZED", + connected_node_label_properties='Tag{label:"Validating RPKI ROV"}', + properties={'ratio':ratio}) else: - self.iyp.add_relationship_properties(node_label_properties=f"AS{{asn: {asn}}}",relationship="CATEGORIZED",connected_node_label_properties='Tag{label:"Not Validating RPKI ROV"}',properties={'ratio':ratio}) + self.iyp.add_relationship_properties( + node_label_properties=f"AS{{asn: {asn}}}", + relationship="CATEGORIZED", + connected_node_label_properties='Tag{label:"Not Validating RPKI ROV"}', + properties={'ratio':ratio}) # Move to the next page offset += batch_size # Break the loop if there's no more data From cc740851d863480ad9d556b680705486b095f7ce Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 6 Feb 2024 04:12:24 +0530 Subject: [PATCH 04/18] Update rovista.py --- iyp/crawlers/rov/rovista.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/iyp/crawlers/rov/rovista.py b/iyp/crawlers/rov/rovista.py index 5c8f2db..30a3baf 100644 --- a/iyp/crawlers/rov/rovista.py +++ b/iyp/crawlers/rov/rovista.py @@ -15,16 +15,12 @@ class Crawler(BaseCrawler): def run(self): - """Fetch the prefix to ASN file from BGPKIT website and process lines one by - one.""" - - + """Get RoVista data from their API.""" batch_size = 1000 # Adjust batch size as needed offset = 0 while True: # Make a request with the current offset - - response = requests.get(URL, params={"offset": offset, "count": batch_size}) + response = requests.get(URL, params={'offset': offset, 'count': batch_size}) if response.status_code != 200: raise RequestStatusError('Error while fetching RoVista data') data = response.json().get('data', []) @@ -33,16 +29,16 @@ def run(self): ratio = entry['ratio'] if ratio > 0.5: self.iyp.add_relationship_properties( - node_label_properties=f"AS{{asn: {asn}}}", - relationship="CATEGORIZED", + node_label_properties=f'AS{{asn: {asn}}}', + relationship='CATEGORIZED', connected_node_label_properties='Tag{label:"Validating RPKI ROV"}', - properties={'ratio':ratio}) + properties={'ratio': ratio}) else: self.iyp.add_relationship_properties( - node_label_properties=f"AS{{asn: {asn}}}", - relationship="CATEGORIZED", + node_label_properties=f'AS{{asn: {asn}}}', + relationship='CATEGORIZED', connected_node_label_properties='Tag{label:"Not Validating RPKI ROV"}', - properties={'ratio':ratio}) + properties={'ratio': ratio}) # Move to the next page offset += batch_size # Break the loop if there's no more data From 7e1b92086e5a1a7f299c125983a06fe023efb130 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 6 Feb 2024 04:14:06 +0530 Subject: [PATCH 05/18] Update __init__.py --- iyp/__init__.py | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/iyp/__init__.py b/iyp/__init__.py index 3f74945..2470cba 100644 --- a/iyp/__init__.py +++ b/iyp/__init__.py @@ -558,6 +558,27 @@ def add_links(self, src_node, links): self.tx.run(matches + where + merges).consume() self.commit() + + def add_relationship_properties( + self, + node_label_properties, + relationship, + connected_node_label_properties, + properties): + """The function goes through each provided property and adds it to the + relationship between the specified nodes, but only if that property doesn't + already exist in the relationship.""" + property_setters = ','.join([f'rel.{prop} = ${{props}}.${{prop}}' for prop in properties.keys()]) + add_query = f""" + MATCH (node1:{node_label_properties})-[rel:{relationship}]-(node2:{connected_node_label_properties}) + WITH rel + UNWIND keys($properties) AS prop + WHERE NOT EXISTS(rel[prop]) + SET {property_setters} + """ + res = self.tx.run(add_query, properties=properties) + res.consume() + self.commit() def batch_add_properties(self, id_prop_list): """Add properties to existing nodes. @@ -579,32 +600,7 @@ def batch_add_properties(self, id_prop_list): res = self.tx.run(add_query, batch=batch) res.consume() self.commit() - - # Function to execute the Cypher query to set properties on relationships - def add_relationship_properties( - self, - node_label_properties, - relationship, - connected_node_label_properties, - properties): - """ - The function goes through each provided property and adds it to the relationship - between the specified nodes, but only if that property doesn't already - exist in the relationship. - """ - property_setters = ", ".join([f"rel.{prop} = ${{props}}.${{prop}}" for prop in properties.keys()]) - add_query = f""" - MATCH (node1:{node_label_properties})-[rel:{relationship}]-(node2:{connected_node_label_properties}) - WITH rel - UNWIND keys($properties) AS prop - WHERE NOT EXISTS(rel[prop]) - SET {property_setters} - """ - - res = self.tx.run(add_query,properties=properties) - res.consume() - self.commit() class BasePostProcess(object): From 46601baf8055b59bbd302516589cac0957a5588a Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:04:18 +0530 Subject: [PATCH 06/18] passed all the test --- iyp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iyp/__init__.py b/iyp/__init__.py index 2470cba..ad9393b 100644 --- a/iyp/__init__.py +++ b/iyp/__init__.py @@ -558,7 +558,7 @@ def add_links(self, src_node, links): self.tx.run(matches + where + merges).consume() self.commit() - + def add_relationship_properties( self, node_label_properties, From 98a99fad6a9652dbb0087b5f4a7b81cf800c864e Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:16:43 +0530 Subject: [PATCH 07/18] rectified changes --- iyp/__init__.py | 25 +--------- iyp/crawlers/{rov => rovista}/README.md | 0 .../rovista.py => rovista/validating_rov.py} | 46 ++++++++++++------- 3 files changed, 31 insertions(+), 40 deletions(-) rename iyp/crawlers/{rov => rovista}/README.md (100%) rename iyp/crawlers/{rov/rovista.py => rovista/validating_rov.py} (54%) diff --git a/iyp/__init__.py b/iyp/__init__.py index ad9393b..ee96188 100644 --- a/iyp/__init__.py +++ b/iyp/__init__.py @@ -559,27 +559,6 @@ def add_links(self, src_node, links): self.tx.run(matches + where + merges).consume() self.commit() - def add_relationship_properties( - self, - node_label_properties, - relationship, - connected_node_label_properties, - properties): - """The function goes through each provided property and adds it to the - relationship between the specified nodes, but only if that property doesn't - already exist in the relationship.""" - property_setters = ','.join([f'rel.{prop} = ${{props}}.${{prop}}' for prop in properties.keys()]) - add_query = f""" - MATCH (node1:{node_label_properties})-[rel:{relationship}]-(node2:{connected_node_label_properties}) - WITH rel - UNWIND keys($properties) AS prop - WHERE NOT EXISTS(rel[prop]) - SET {property_setters} - """ - res = self.tx.run(add_query, properties=properties) - res.consume() - self.commit() - def batch_add_properties(self, id_prop_list): """Add properties to existing nodes. @@ -597,10 +576,10 @@ def batch_add_properties(self, id_prop_list): MATCH (n) WHERE ID(n) = item.id SET n += item.props""" + res = self.tx.run(add_query, batch=batch) res.consume() self.commit() - # Function to execute the Cypher query to set properties on relationships class BasePostProcess(object): @@ -725,4 +704,4 @@ def save_cached_object(self, object_name: str, object) -> None: pickle.dump(object, f) def clear_cache(self) -> None: - rmtree(self.cache_dir) + rmtree(self.cache_dir) \ No newline at end of file diff --git a/iyp/crawlers/rov/README.md b/iyp/crawlers/rovista/README.md similarity index 100% rename from iyp/crawlers/rov/README.md rename to iyp/crawlers/rovista/README.md diff --git a/iyp/crawlers/rov/rovista.py b/iyp/crawlers/rovista/validating_rov.py similarity index 54% rename from iyp/crawlers/rov/rovista.py rename to iyp/crawlers/rovista/validating_rov.py index 30a3baf..36d744f 100644 --- a/iyp/crawlers/rov/rovista.py +++ b/iyp/crawlers/rovista/validating_rov.py @@ -8,8 +8,8 @@ from iyp import BaseCrawler, RequestStatusError URL = 'https://api.rovista.netsecurelab.org/rovista/api/overview' -ORG = 'ROV' -NAME = 'rov.rovista' +ORG = 'RoVista' +NAME = 'rovista.validating_rov' class Crawler(BaseCrawler): @@ -18,33 +18,45 @@ def run(self): """Get RoVista data from their API.""" batch_size = 1000 # Adjust batch size as needed offset = 0 + entries = [] + asns = set() + while True: # Make a request with the current offset response = requests.get(URL, params={'offset': offset, 'count': batch_size}) if response.status_code != 200: raise RequestStatusError('Error while fetching RoVista data') + data = response.json().get('data', []) for entry in data: - asn = entry['asn'] - ratio = entry['ratio'] - if ratio > 0.5: - self.iyp.add_relationship_properties( - node_label_properties=f'AS{{asn: {asn}}}', - relationship='CATEGORIZED', - connected_node_label_properties='Tag{label:"Validating RPKI ROV"}', - properties={'ratio': ratio}) + asns.add(entry['asn']) + if entry['ratio'] > 0.5: + entries.append({'asn':entry['asn'],'ratio':entry['ratio'],'label':'Validating RPKI ROV'}) else: - self.iyp.add_relationship_properties( - node_label_properties=f'AS{{asn: {asn}}}', - relationship='CATEGORIZED', - connected_node_label_properties='Tag{label:"Not Validating RPKI ROV"}', - properties={'ratio': ratio}) + entries.append({'asn':entry['asn'],'ratio':entry['ratio'],'label':'Not Validating RPKI ROV'}) + # Move to the next page - offset += batch_size + offset += 1 # Break the loop if there's no more data if len(data) < batch_size: break - + logging.info('Pushing nodes to neo4j...\n') + # get ASNs and prefixes IDs + self.asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns) + tag_id_not_vali= self.iyp.get_node('Tag',{'label':"Not Validating RPKI ROV"},create=True) + tag_id_vali=self.iyp.get_node('Tag',{'label':"Validating RPKI ROV"},create=True) + # Compute links + links = [] + for entry in entries: + asn_qid = self.asn_id[entry['asn']] + if entry['ratio'] > 0.5: + links.append({'src_id': asn_qid, 'dst_id':tag_id_vali , 'props': [self.reference, entry]}) + else : + links.append({'src_id': asn_qid, 'dst_id':tag_id_not_vali , 'props': [self.reference, entry]}) + + logging.info('Pushing links to neo4j...\n') + # Push all links to IYP + self.iyp.batch_add_links('CATEGORIZED', links) def main() -> None: parser = argparse.ArgumentParser() From 1edbd675d3243d026f42442d59a7fb0c297a01e1 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:29:20 +0530 Subject: [PATCH 08/18] Update validating_rov.py --- iyp/crawlers/rovista/validating_rov.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/iyp/crawlers/rovista/validating_rov.py b/iyp/crawlers/rovista/validating_rov.py index 36d744f..3b96e5d 100644 --- a/iyp/crawlers/rovista/validating_rov.py +++ b/iyp/crawlers/rovista/validating_rov.py @@ -11,7 +11,6 @@ ORG = 'RoVista' NAME = 'rovista.validating_rov' - class Crawler(BaseCrawler): def run(self): @@ -20,7 +19,6 @@ def run(self): offset = 0 entries = [] asns = set() - while True: # Make a request with the current offset response = requests.get(URL, params={'offset': offset, 'count': batch_size}) @@ -31,10 +29,9 @@ def run(self): for entry in data: asns.add(entry['asn']) if entry['ratio'] > 0.5: - entries.append({'asn':entry['asn'],'ratio':entry['ratio'],'label':'Validating RPKI ROV'}) + entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Validating RPKI ROV'}) else: - entries.append({'asn':entry['asn'],'ratio':entry['ratio'],'label':'Not Validating RPKI ROV'}) - + entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Not Validating RPKI ROV'}) # Move to the next page offset += 1 # Break the loop if there's no more data From 3eb3cc252f7683d140fb6547fa3de82da47ce18d Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:33:43 +0530 Subject: [PATCH 09/18] Update validating_rov.py --- iyp/crawlers/rovista/validating_rov.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/iyp/crawlers/rovista/validating_rov.py b/iyp/crawlers/rovista/validating_rov.py index 3b96e5d..97217a0 100644 --- a/iyp/crawlers/rovista/validating_rov.py +++ b/iyp/crawlers/rovista/validating_rov.py @@ -11,6 +11,7 @@ ORG = 'RoVista' NAME = 'rovista.validating_rov' + class Crawler(BaseCrawler): def run(self): @@ -19,19 +20,21 @@ def run(self): offset = 0 entries = [] asns = set() + while True: # Make a request with the current offset response = requests.get(URL, params={'offset': offset, 'count': batch_size}) if response.status_code != 200: raise RequestStatusError('Error while fetching RoVista data') - + data = response.json().get('data', []) for entry in data: asns.add(entry['asn']) if entry['ratio'] > 0.5: entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Validating RPKI ROV'}) else: - entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Not Validating RPKI ROV'}) + entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Not Validating RPKI ROV'}) + # Move to the next page offset += 1 # Break the loop if there's no more data @@ -40,21 +43,22 @@ def run(self): logging.info('Pushing nodes to neo4j...\n') # get ASNs and prefixes IDs self.asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns) - tag_id_not_vali= self.iyp.get_node('Tag',{'label':"Not Validating RPKI ROV"},create=True) - tag_id_vali=self.iyp.get_node('Tag',{'label':"Validating RPKI ROV"},create=True) + tag_id_not_vali = self.iyp.get_node('Tag', {'label': "Not Validating RPKI ROV"}, create=True) + tag_id_vali = self.iyp.get_node('Tag', {'label': "Validating RPKI ROV"}, create=True) # Compute links links = [] for entry in entries: asn_qid = self.asn_id[entry['asn']] if entry['ratio'] > 0.5: - links.append({'src_id': asn_qid, 'dst_id':tag_id_vali , 'props': [self.reference, entry]}) - else : - links.append({'src_id': asn_qid, 'dst_id':tag_id_not_vali , 'props': [self.reference, entry]}) - + links.append({'src_id': asn_qid, 'dst_id': tag_id_vali, 'props': [self.reference, entry]}) + else: + links.append({'src_id': asn_qid, 'dst_id': tag_id_not_vali, 'props': [self.reference, entry]}) + logging.info('Pushing links to neo4j...\n') # Push all links to IYP self.iyp.batch_add_links('CATEGORIZED', links) + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument('--unit-test', action='store_true') From f2846d527c26d58aed3a805dc19a8cd7c01ebc88 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:35:49 +0530 Subject: [PATCH 10/18] Update validating_rov.py --- iyp/crawlers/rovista/validating_rov.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iyp/crawlers/rovista/validating_rov.py b/iyp/crawlers/rovista/validating_rov.py index 97217a0..2581307 100644 --- a/iyp/crawlers/rovista/validating_rov.py +++ b/iyp/crawlers/rovista/validating_rov.py @@ -43,8 +43,8 @@ def run(self): logging.info('Pushing nodes to neo4j...\n') # get ASNs and prefixes IDs self.asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns) - tag_id_not_vali = self.iyp.get_node('Tag', {'label': "Not Validating RPKI ROV"}, create=True) - tag_id_vali = self.iyp.get_node('Tag', {'label': "Validating RPKI ROV"}, create=True) + tag_id_not_vali = self.iyp.get_node('Tag', {'label': 'Not Validating RPKI ROV'}, create=True) + tag_id_vali = self.iyp.get_node('Tag', {'label': 'Validating RPKI ROV'}, create=True) # Compute links links = [] for entry in entries: From 03a522887a41457e09514c822b5725d9e0220bc4 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:43:20 +0530 Subject: [PATCH 11/18] Update __init__.py --- iyp/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/iyp/__init__.py b/iyp/__init__.py index ee96188..4daa91e 100644 --- a/iyp/__init__.py +++ b/iyp/__init__.py @@ -704,4 +704,5 @@ def save_cached_object(self, object_name: str, object) -> None: pickle.dump(object, f) def clear_cache(self) -> None: - rmtree(self.cache_dir) \ No newline at end of file + rmtree(self.cache_dir) + From 7aab9853494e4b1f900563da122396b32211f6a0 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:44:44 +0530 Subject: [PATCH 12/18] Update __init__.py From 40cf365e3c0996b1d547b8bf2dd0c81cde90f864 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:46:20 +0530 Subject: [PATCH 13/18] Update __init__.py --- iyp/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/iyp/__init__.py b/iyp/__init__.py index 4daa91e..7dcfe36 100644 --- a/iyp/__init__.py +++ b/iyp/__init__.py @@ -705,4 +705,3 @@ def save_cached_object(self, object_name: str, object) -> None: def clear_cache(self) -> None: rmtree(self.cache_dir) - From 0dbf98cb291a7f167c75d488b1e9d6a6c5d3078f Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Wed, 7 Feb 2024 20:01:17 +0530 Subject: [PATCH 14/18] Update __init__.py From b7795560ca10404dbcced133a4be9ba8a588a7a4 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Thu, 8 Feb 2024 08:13:07 +0530 Subject: [PATCH 15/18] Requested changes --- iyp/crawlers/rovista/validating_rov.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/iyp/crawlers/rovista/validating_rov.py b/iyp/crawlers/rovista/validating_rov.py index 2581307..1e8484b 100644 --- a/iyp/crawlers/rovista/validating_rov.py +++ b/iyp/crawlers/rovista/validating_rov.py @@ -40,21 +40,21 @@ def run(self): # Break the loop if there's no more data if len(data) < batch_size: break - logging.info('Pushing nodes to neo4j...\n') + logging.info('Pushing nodes to neo4j...') # get ASNs and prefixes IDs self.asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns) - tag_id_not_vali = self.iyp.get_node('Tag', {'label': 'Not Validating RPKI ROV'}, create=True) - tag_id_vali = self.iyp.get_node('Tag', {'label': 'Validating RPKI ROV'}, create=True) + tag_id_not_valid = self.iyp.get_node('Tag', {'label': 'Not Validating RPKI ROV'}, create=True) + tag_id_valid = self.iyp.get_node('Tag', {'label': 'Validating RPKI ROV'}, create=True) # Compute links links = [] for entry in entries: asn_qid = self.asn_id[entry['asn']] if entry['ratio'] > 0.5: - links.append({'src_id': asn_qid, 'dst_id': tag_id_vali, 'props': [self.reference, entry]}) + links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, 'props': [self.reference, {'ratio': entry['ratio']}) else: - links.append({'src_id': asn_qid, 'dst_id': tag_id_not_vali, 'props': [self.reference, entry]}) + links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, 'props': [self.reference, {'ratio': entry['ratio']}) - logging.info('Pushing links to neo4j...\n') + logging.info('Pushing links to neo4j...') # Push all links to IYP self.iyp.batch_add_links('CATEGORIZED', links) From df1a3d5d33c5d15f6d08e95a99b15e6475510da9 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Thu, 8 Feb 2024 08:16:47 +0530 Subject: [PATCH 16/18] Minor error --- iyp/crawlers/rovista/validating_rov.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iyp/crawlers/rovista/validating_rov.py b/iyp/crawlers/rovista/validating_rov.py index 1e8484b..7cb90a3 100644 --- a/iyp/crawlers/rovista/validating_rov.py +++ b/iyp/crawlers/rovista/validating_rov.py @@ -50,9 +50,9 @@ def run(self): for entry in entries: asn_qid = self.asn_id[entry['asn']] if entry['ratio'] > 0.5: - links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, 'props': [self.reference, {'ratio': entry['ratio']}) + links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, 'props': [self.reference, {'ratio': entry['ratio']}]) else: - links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, 'props': [self.reference, {'ratio': entry['ratio']}) + links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, 'props': [self.reference, {'ratio': entry['ratio']}]) logging.info('Pushing links to neo4j...') # Push all links to IYP From 97c8957ef1a7550328efa76afec9217abdd1d6b1 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Thu, 8 Feb 2024 09:21:12 +0530 Subject: [PATCH 17/18] bracks added --- iyp/crawlers/rovista/validating_rov.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iyp/crawlers/rovista/validating_rov.py b/iyp/crawlers/rovista/validating_rov.py index 7cb90a3..a8e1721 100644 --- a/iyp/crawlers/rovista/validating_rov.py +++ b/iyp/crawlers/rovista/validating_rov.py @@ -50,9 +50,9 @@ def run(self): for entry in entries: asn_qid = self.asn_id[entry['asn']] if entry['ratio'] > 0.5: - links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, 'props': [self.reference, {'ratio': entry['ratio']}]) + links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, 'props': [self.reference, {'ratio': entry['ratio']}]}) else: - links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, 'props': [self.reference, {'ratio': entry['ratio']}]) + links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, 'props': [self.reference, {'ratio': entry['ratio']}]}) logging.info('Pushing links to neo4j...') # Push all links to IYP From d1a3358b54475d92c5c1a34d319f5ca8b7841151 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Thu, 8 Feb 2024 09:29:38 +0530 Subject: [PATCH 18/18] precommit-resolved --- iyp/crawlers/rovista/validating_rov.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/iyp/crawlers/rovista/validating_rov.py b/iyp/crawlers/rovista/validating_rov.py index a8e1721..90fbcc0 100644 --- a/iyp/crawlers/rovista/validating_rov.py +++ b/iyp/crawlers/rovista/validating_rov.py @@ -50,9 +50,11 @@ def run(self): for entry in entries: asn_qid = self.asn_id[entry['asn']] if entry['ratio'] > 0.5: - links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, 'props': [self.reference, {'ratio': entry['ratio']}]}) + links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, 'props': [ + self.reference, {'ratio': entry['ratio']}]}) else: - links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, 'props': [self.reference, {'ratio': entry['ratio']}]}) + links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, + 'props': [self.reference, {'ratio': entry['ratio']}]}) logging.info('Pushing links to neo4j...') # Push all links to IYP