Skip to content

Commit

Permalink
Add info URL and modification timestamp for Virginia Tech
Browse files Browse the repository at this point in the history
  • Loading branch information
m-appel committed Feb 16, 2024
1 parent 51f411e commit 9d79c11
Showing 1 changed file with 21 additions and 6 deletions.
27 changes: 21 additions & 6 deletions iyp/crawlers/virginiatech/rovista.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import sys
from datetime import datetime, timezone

import requests

Expand All @@ -13,10 +14,21 @@


class Crawler(BaseCrawler):
def __init__(self, organization, url, name):
super().__init__(organization, url, name)
self.reference['reference_url_info'] = 'https://rovista.netsecurelab.org/'

def __set_modification_time(self, entry):
try:
date_str = entry['lastUpdatedDate']
date = datetime.strptime(date_str, '%Y-%m-%d').replace(tzinfo=timezone.utc)
self.reference['reference_time_modification'] = date
except (KeyError, ValueError) as e:
logging.warning(f'Failed to set modification time: {e}')

def run(self):
"""Get RoVista data from their API."""
batch_size = 1000 # Adjust batch size as needed
batch_size = 1000
offset = 0
entries = []
asns = set()
Expand All @@ -25,26 +37,29 @@ def run(self):
# Make a request with the current offset
response = requests.get(URL, params={'offset': offset, 'count': batch_size})
if response.status_code != 200:
raise RequestStatusError('Error while fetching RoVista data')
raise RequestStatusError(f'Error while fetching RoVista data: {response.status_code}')

data = response.json().get('data', [])
for entry in data:
if not self.reference['reference_time_modification']:
self.__set_modification_time(entry)
asns.add(entry['asn'])
if entry['ratio'] > 0.5:
entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Validating RPKI ROV'})
entries.append({'asn': entry['asn'], 'ratio': entry['ratio']})
else:
entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Not Validating RPKI ROV'})
entries.append({'asn': entry['asn'], 'ratio': entry['ratio']})

# Move to the next page
offset += 1
# Break the loop if there's no more data
if len(data) < batch_size:
break

logging.info('Pushing nodes to neo4j...')
# get ASNs and prefixes IDs
self.asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns)
tag_id_not_valid = self.iyp.get_node('Tag', {'label': 'Not Validating RPKI ROV'}, create=True)
tag_id_valid = self.iyp.get_node('Tag', {'label': 'Validating RPKI ROV'}, create=True)
tag_id_not_valid = self.iyp.get_node('Tag', {'label': 'Not Validating RPKI ROV'})
tag_id_valid = self.iyp.get_node('Tag', {'label': 'Validating RPKI ROV'})
# Compute links
links = []
for entry in entries:
Expand Down

0 comments on commit 9d79c11

Please sign in to comment.