diff --git a/perceval/backend.py b/perceval/backend.py index 8899c3214..e196d2ba0 100644 --- a/perceval/backend.py +++ b/perceval/backend.py @@ -22,6 +22,7 @@ import argparse import functools +import hashlib import sys from .cache import Cache @@ -37,6 +38,9 @@ class Backend: Derivated classes have to implement `fetch` and `fetch_from_cache` methods. Otherwise, `NotImplementedError` exception will be raised. + Metadata decorator can be used together with fetch methods but + requires the implementation of `metadata_id` and `metadata_updated_on` + static methods. To track which version of the backend was used during the fetching process, this class provides a `version` attribute that each backend @@ -70,6 +74,14 @@ def fetch(self, from_date=DEFAULT_DATETIME): def fetch_from_cache(self): raise NotImplementedError + @staticmethod + def metadata_id(item): + raise NotImplementedError + + @staticmethod + def metadata_updated_on(item): + raise NotImplementedError + def _purge_cache_queue(self): self.cache_queue = [] @@ -138,34 +150,61 @@ def create_argument_parser(cls): return parser -def metadata(fdate): +def metadata(func): """Add metadata to an item. Decorator that adds metadata to a given item such as how and when it was fetched. - As input parameters, this function requieres as function which - extracts from an item when it was updated. - Take into account that this decorator can only be called from a 'Backend' class due it needs access to some of the attributes - of this class. + and methods of this class. """ from datetime import datetime as dt META_KEY = '__metadata__' - def metadata_decorator(func): - @functools.wraps(func) - def decorator(self, *args, **kwargs): - for item in func(self, *args, **kwargs): - item[META_KEY] = { - 'backend_name' : self.__class__.__name__, - 'backend_version': self.version, - 'timestamp' : dt.now().timestamp(), - 'origin' : self.origin, - 'updated_on' : fdate(item), - } - yield item - return decorator - return metadata_decorator + @functools.wraps(func) + def decorator(self, *args, **kwargs): + for item in func(self, *args, **kwargs): + item[META_KEY] = { + 'backend_name' : self.__class__.__name__, + 'backend_version': self.version, + 'timestamp' : dt.now().timestamp(), + 'origin' : self.origin, + 'uuid' : uuid(self.origin, self.metadata_id(item)), + 'updated_on' : self.metadata_updated_on(item), + } + yield item + return decorator + + +def uuid(*args): + """Generate a UUID based on the given parameters. + + The UUID will be the SHA1 of the concatenation of the values + from the list. The separator bewteedn these values is ':'. + Each value must be a non-empty string, otherwise, the function + will raise an exception. + + :param *args: list of arguments used to generate the UUID + + :returns: a universal unique identifier + + :raises ValueError: when anyone of the values is not a string, + is empty or `None`. + """ + def check_value(v): + if not isinstance(v, str): + raise ValueError("%s value is not a string instance" % str(v)) + elif not v: + raise ValueError("value cannot be None or empty") + else: + return v + + s = ':'.join(map(check_value, args)) + + sha1 = hashlib.sha1(s.encode('utf-8')) + uuid_sha1 = sha1.hexdigest() + + return uuid_sha1 diff --git a/perceval/backends/bugzilla.py b/perceval/backends/bugzilla.py index 9b32b7fde..06e958cb2 100644 --- a/perceval/backends/bugzilla.py +++ b/perceval/backends/bugzilla.py @@ -43,21 +43,6 @@ logger = logging.getLogger(__name__) -def get_update_time(item): - """Extracts and coverts the update time from a Bugzilla item. - - The timestamp is extracted from 'delta_ts' field. This date is - converted to UNIX timestamp format. Due Bugzilla servers ignore - the timezone on HTTP requests, it will be ignored during the - conversion, too. - """ - ts = item['delta_ts'][0]['__text__'] - ts = str_to_datetime(ts) - ts = ts.replace(tzinfo=dateutil.tz.tzutc()) - - return ts.timestamp() - - class Bugzilla(Backend): """Bugzilla backend. @@ -80,7 +65,7 @@ def __init__(self, url, user=None, password=None, self.max_bugs = max(1, max_bugs) self.client = BugzillaClient(url, user=user, password=password) - @metadata(get_update_time) + @metadata def fetch(self, from_date=DEFAULT_DATETIME): """Fetch the bugs from the repository. @@ -122,7 +107,7 @@ def fetch(self, from_date=DEFAULT_DATETIME): logger.info("Fetch process completed: %s/%s bugs fetched", nbugs, tbugs) - @metadata(get_update_time) + @metadata def fetch_from_cache(self): """Fetch the bugs from the cache. @@ -205,6 +190,31 @@ def __fetch_and_parse_bug_activity(self, bug_id): activity = self.parse_bug_activity(raw_activity) return [event for event in activity] + @staticmethod + def metadata_id(item): + """Extracts the identifier from a Bugzilla item.""" + + return item['bug_id'][0]['__text__'] + + @staticmethod + def metadata_updated_on(item): + """Extracts and coverts the update time from a Bugzilla item. + + The timestamp is extracted from 'delta_ts' field. This date is + converted to UNIX timestamp format. Due Bugzilla servers ignore + the timezone on HTTP requests, it will be ignored during the + conversion, too. + + :param item: item generated by the backend + + :returns: a UNIX timestamp + """ + ts = item['delta_ts'][0]['__text__'] + ts = str_to_datetime(ts) + ts = ts.replace(tzinfo=dateutil.tz.tzutc()) + + return ts.timestamp() + @staticmethod def parse_buglist(raw_csv): """Parse a Bugzilla CSV bug list. diff --git a/perceval/backends/gerrit.py b/perceval/backends/gerrit.py index 0525d6338..784ece249 100644 --- a/perceval/backends/gerrit.py +++ b/perceval/backends/gerrit.py @@ -40,11 +40,6 @@ logger = logging.getLogger(__name__) -def get_update_time(item): - """Extracts the update time from a Gerrit item""" - return float(item['lastUpdated']) - - class Gerrit(Backend): """Gerrit backend. @@ -64,7 +59,7 @@ def __init__(self, url, user=None, max_reviews=None, cache=None): self.max_reviews = max_reviews self.client = GerritClient(self.url, user, max_reviews) - @metadata(get_update_time) + @metadata def fetch(self, from_date=DEFAULT_DATETIME): """Fetch the reviews from the repository. @@ -99,7 +94,7 @@ def fetch(self, from_date=DEFAULT_DATETIME): last_item = self.client.next_retrieve_group_item(last_item, review) reviews = self._get_reviews(last_item) - @metadata(get_update_time) + @metadata def fetch_from_cache(self): """Fetch reviews from the cache. @@ -133,6 +128,25 @@ def _get_reviews(self, last_item): time.time()-task_init)) return reviews + @staticmethod + def metadata_id(item): + """Extracts the identifier from a Gerrit item.""" + + return item['id'] + + @staticmethod + def metadata_updated_on(item): + """Extracts and converts the update time from a Gerrit item. + + The timestamp is extracted from 'lastUpdated' field. This date is + a UNIX timestamp but needs to be converted to a float value. + + :param item: item generated by the backend + + :returns: a UNIX timestamp + """ + return float(item['lastUpdated']) + @staticmethod def parse_reviews(raw_data): """Parse a Gerrit reviews list.""" diff --git a/perceval/backends/git.py b/perceval/backends/git.py index 34c35adf7..c5ee9d4de 100644 --- a/perceval/backends/git.py +++ b/perceval/backends/git.py @@ -34,19 +34,6 @@ logger = logging.getLogger(__name__) -def get_update_time(item): - """Extracts the update time from a Git item. - - The timestamp used is extracted from 'CommitDate' field. - This date is converted to UNIX timestamp format taking into - account the timezone of the date. - """ - ts = item['CommitDate'] - ts = str_to_datetime(ts) - - return ts.timestamp() - - class Git(Backend): """Git backend. @@ -72,7 +59,7 @@ def __init__(self, uri, gitpath, cache=None): self.uri = uri self.gitpath = gitpath - @metadata(get_update_time) + @metadata def fetch(self, from_date=DEFAULT_DATETIME): """Fetch commits. @@ -128,6 +115,29 @@ def __create_and_update_git_repository(self): return repo + @staticmethod + def metadata_id(item): + """Extracts the identifier from a Git item.""" + + return item['commit'] + + @staticmethod + def metadata_updated_on(item): + """Extracts the update time from a Git item. + + The timestamp used is extracted from 'CommitDate' field. + This date is converted to UNIX timestamp format taking into + account the timezone of the date. + + :param item: item generated by the backend + + :returns: a UNIX timestamp + """ + ts = item['CommitDate'] + ts = str_to_datetime(ts) + + return ts.timestamp() + @staticmethod def parse_git_log_from_file(filepath): """Parse a Git log file. diff --git a/perceval/backends/github.py b/perceval/backends/github.py index 400a83b2f..e6c526ba7 100644 --- a/perceval/backends/github.py +++ b/perceval/backends/github.py @@ -44,19 +44,6 @@ logger = logging.getLogger(__name__) -def get_update_time(item): - """Extracts the update time from a GitHub item. - - The timestamp used is extracted from 'updated_at' field. - This date is converted to UNIX timestamp format. As GitHub - dates are in UTC the conversion is straightforward. - """ - ts = item['updated_at'] - ts = str_to_datetime(ts) - - return ts.timestamp() - - class GitHub(Backend): """GitHub backend for Perceval. @@ -102,7 +89,7 @@ def __get_user(self, login): return user - @metadata(get_update_time) + @metadata def fetch(self, from_date=DEFAULT_DATETIME): """Fetch the issues from the repository. @@ -132,7 +119,7 @@ def fetch(self, from_date=DEFAULT_DATETIME): issue[field+"_data"] = {} yield issue - @metadata(get_update_time) + @metadata def fetch_from_cache(self): """Fetch the issues from the cache. @@ -196,6 +183,29 @@ def __build_issues(self, issues): self._users[issue[field]['login']] yield issue + @staticmethod + def metadata_id(item): + """Extracts the identifier from a GitHub item.""" + + return str(item['id']) + + @staticmethod + def metadata_updated_on(item): + """Extracts the update time from a GitHub item. + + The timestamp used is extracted from 'updated_at' field. + This date is converted to UNIX timestamp format. As GitHub + dates are in UTC the conversion is straightforward. + + :param item: item generated by the backend + + :returns: a UNIX timestamp + """ + ts = item['updated_at'] + ts = str_to_datetime(ts) + + return ts.timestamp() + class GitHubClient: """ Client for retieving information from GitHub API """ diff --git a/perceval/backends/jira.py b/perceval/backends/jira.py index 0688311df..7a75f6ed8 100644 --- a/perceval/backends/jira.py +++ b/perceval/backends/jira.py @@ -41,19 +41,6 @@ logger = logging.getLogger(__name__) -def get_update_time(item): - """Extracts the update time from a issue item. - - The timestamp used is extracted from 'updated' field. - This date is converted to UNIX timestamp format taking - into account the timezone of the date. - """ - ts = item['fields']['updated'] - ts = str_to_datetime(ts) - - return ts.timestamp() - - class Jira(Backend): """JIRA backend for Perceval. @@ -83,7 +70,7 @@ def __init__(self, url, project=None, backend_user=None, self.client = JiraClient(url, project, backend_user, backend_password, verify, cert, max_issues) - @metadata(get_update_time) + @metadata def fetch(self, from_date=DEFAULT_DATETIME): """Fetch the issues from the site. @@ -113,7 +100,7 @@ def fetch(self, from_date=DEFAULT_DATETIME): for issue in issues: yield issue - @metadata(get_update_time) + @metadata def fetch_from_cache(self): """Fetch the issues from the cache. @@ -132,6 +119,29 @@ def fetch_from_cache(self): for issue in issues: yield issue + @staticmethod + def metadata_id(item): + """Extracts the identifier from a Jira item.""" + + return str(item['id']) + + @staticmethod + def metadata_updated_on(item): + """Extracts the update time from a issue item. + + The timestamp used is extracted from 'updated' field. + This date is converted to UNIX timestamp format taking + into account the timezone of the date. + + :param item: item generated by the backend + + :returns: a UNIX timestamp + """ + ts = item['fields']['updated'] + ts = str_to_datetime(ts) + + return ts.timestamp() + @staticmethod def parse_issues(raw_page): """Parse a JIRA API raw response. diff --git a/perceval/backends/mbox.py b/perceval/backends/mbox.py index ee263c7a1..724978dee 100644 --- a/perceval/backends/mbox.py +++ b/perceval/backends/mbox.py @@ -43,19 +43,6 @@ logger = logging.getLogger(__name__) -def get_update_time(item): - """Extracts the update time from a message item. - - The timestamp used is extracted from 'Date' field in its - several forms. This date is converted to UNIX timestamp - format. - """ - ts = item['Date'] if 'Date' in item else item['date'] - ts = str_to_datetime(ts) - - return ts.timestamp() - - class MBox(Backend): """MBox backend. @@ -70,11 +57,14 @@ class MBox(Backend): """ version = '0.1.0' + DATE_FIELD = 'Date' + MESSAGE_ID_FIELD = 'Message-ID' + def __init__(self, origin, dirpath, cache=None): super().__init__(origin, cache=cache) self.dirpath = dirpath - @metadata(get_update_time) + @metadata def fetch(self): """Fetch the commits from a set of mbox files. @@ -105,7 +95,7 @@ def fetch(self): logger.debug("Message from %s parsed", message['unixfrom']) # Convert 'CaseInsensitiveDict' to dict - message = {k : v for k, v in message.items()} + message = self._casedict_to_dict(message) yield message except OSError as e: @@ -118,7 +108,6 @@ def fetch(self): if os.path.exists(tmp_path): os.remove(tmp_path) - logger.info("Fetch process completed: %s/%s messages fetched; %s ignored", nmsgs, tmsgs, imsgs) @@ -139,17 +128,55 @@ def _validate_message(self, message): # This check is "case insensitive" because we're # using 'CaseInsensitiveDict' from requests.structures # module to store the contents of a message. - if 'Message-ID' not in message: + if self.MESSAGE_ID_FIELD not in message: logger.warning("Field 'Message-ID' not found in message %s; ignoring", message['unixfrom']) return False - elif 'Date' not in message: + elif self.DATE_FIELD not in message: logger.warning("Field 'Date' not found in message %s; ignoring", message['unixfrom']) return False else: return True + def _casedict_to_dict(self, message): + """Convert a message in CaseInsensitiveDict to dict. + + This method also converts well known problematic headers, + such as Message-ID and Date to a common name. + """ + message_id = message.pop(self.MESSAGE_ID_FIELD) + date = message.pop(self.DATE_FIELD) + + msg = {k : v for k, v in message.items()} + msg[self.MESSAGE_ID_FIELD] = message_id + msg[self.DATE_FIELD] = date + + return msg + + @staticmethod + def metadata_id(item): + """Extracts the identifier from a Git item.""" + + return item[MBox.MESSAGE_ID_FIELD] + + @staticmethod + def metadata_updated_on(item): + """Extracts the update time from a message item. + + The timestamp used is extracted from 'Date' field in its + several forms. This date is converted to UNIX timestamp + format. + + :param item: item generated by the backend + + :returns: a UNIX timestamp + """ + ts = item[MBox.DATE_FIELD] + ts = str_to_datetime(ts) + + return ts.timestamp() + @staticmethod def parse_mbox(filepath): """Parse a mbox file. diff --git a/perceval/backends/stackexchange.py b/perceval/backends/stackexchange.py index 236295370..2abcbcd72 100644 --- a/perceval/backends/stackexchange.py +++ b/perceval/backends/stackexchange.py @@ -40,11 +40,6 @@ logger = logging.getLogger(__name__) -def get_update_time(item): - """Extracts the update time from a StackExchange item""" - return float(item['last_activity_date']) - - class StackExchange(Backend): """StackExchange backend for Perceval. @@ -68,7 +63,7 @@ def __init__(self, site, tagged=None, token=None, self.max_questions = max_questions self.client = StackExchangeClient(site, tagged, token, max_questions) - @metadata(get_update_time) + @metadata def fetch(self, from_date=DEFAULT_DATETIME): """Fetch the questions from the site. @@ -98,7 +93,7 @@ def fetch(self, from_date=DEFAULT_DATETIME): for question in questions: yield question - @metadata(get_update_time) + @metadata def fetch_from_cache(self): """Fetch the questions from the cache. @@ -117,6 +112,26 @@ def fetch_from_cache(self): for question in questions: yield question + @staticmethod + def metadata_id(item): + """Extracts the identifier from a Git item.""" + + return str(item['question_id']) + + @staticmethod + def metadata_updated_on(item): + """Extracts the update time from a StackExchange item. + + The timestamp is extracted from 'last_activity_date' field. + This date is a UNIX timestamp but needs to be converted to + a float value. + + :param item: item generated by the backend + + :returns: a UNIX timestamp + """ + return float(item['last_activity_date']) + @staticmethod def parse_questions(raw_page): """Parse a StackExchange API raw response. diff --git a/tests/test_backend.py b/tests/test_backend.py index f2b642bdc..0cb63580a 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -21,16 +21,18 @@ # Santiago Dueñas # +import argparse +import datetime import sys +import unittest if not '..' in sys.path: sys.path.insert(0, '..') -import argparse -import datetime -import unittest - -from perceval.backend import Backend, BackendCommand, metadata +from perceval.backend import (Backend, + BackendCommand, + metadata, + uuid) class TestBackend(unittest.TestCase): @@ -81,10 +83,6 @@ def test_argument_parser(self): self.assertIsInstance(parser, argparse.ArgumentParser) -def mock_fnc_date(item): - return '2016-01-01' - - class MockDecoratorBackend(Backend): """Mock backend to test metadata decorators""" @@ -93,12 +91,26 @@ class MockDecoratorBackend(Backend): def __init__(self, origin): super().__init__(origin) - @metadata(mock_fnc_date) - def fetch(self): + @metadata + def fetch(self, from_date=None): for x in range(5): item = {'item' : x} yield item + @metadata + def fetch_from_cache(self): + for x in range(5): + item = {'item' : x} + yield item + + @staticmethod + def metadata_id(item): + return str(item['item']) + + @staticmethod + def metadata_updated_on(item): + return '2016-01-01' + class TestMetadata(unittest.TestCase): """Test metadata decorator""" @@ -111,17 +123,54 @@ def test_decorator(self): for x in range(5): item = items[x] - metadata = item['__metadata__'] + meta = item['__metadata__'] + + expected_uuid = uuid('test', str(x)) self.assertEqual(item['item'], x) - self.assertEqual(metadata['backend_name'], 'MockDecoratorBackend') - self.assertEqual(metadata['backend_version'], '0.1.0') - self.assertEqual(metadata['origin'], 'test') - self.assertEqual(metadata['updated_on'], '2016-01-01') - self.assertGreater(metadata['timestamp'], before) - self.assertLess(metadata['timestamp'], after) - - before = metadata['timestamp'] + self.assertEqual(meta['backend_name'], 'MockDecoratorBackend') + self.assertEqual(meta['backend_version'], '0.1.0') + self.assertEqual(meta['origin'], 'test') + self.assertEqual(meta['uuid'], expected_uuid) + self.assertEqual(meta['updated_on'], '2016-01-01') + self.assertGreater(meta['timestamp'], before) + self.assertLess(meta['timestamp'], after) + + before = meta['timestamp'] + + +class TestUUID(unittest.TestCase): + """Unit tests for uuid function""" + + def test_uuid(self): + """Check whether the function returns the expected UUID""" + + result = uuid('1', '2', '3', '4') + self.assertEqual(result, 'e7b71c81f5a0723e2237f157dba81777ce7c6c21') + + result = uuid('http://example.com/', '1234567') + self.assertEqual(result, '47509b2f0d4ffc513ca9230838a69aa841d7f055') + + def test_non_str_value(self): + """Check whether a UUID cannot be generated when a given value is not a str""" + + self.assertRaises(ValueError, uuid, '1', '2', 3, '4') + self.assertRaises(ValueError, uuid, 0, '1', '2', '3') + self.assertRaises(ValueError, uuid, '1', '2', '3', 4.0) + + def test_none_value(self): + """Check whether a UUID cannot be generated when a given value is None""" + + self.assertRaises(ValueError, uuid, '1', '2', None, '3') + self.assertRaises(ValueError, uuid, None, '1', '2', '3') + self.assertRaises(ValueError, uuid, '1', '2', '3', None) + + def test_empty_value(self): + """Check whether a UUID cannot be generated when a given value is empty""" + + self.assertRaises(ValueError, uuid, '1', '', '2', '3') + self.assertRaises(ValueError, uuid, '', '1', '2', '3') + self.assertRaises(ValueError, uuid, '1', '2', '3', '') if __name__ == "__main__": diff --git a/tests/test_bugzilla.py b/tests/test_bugzilla.py index df0a2434a..a45305d30 100644 --- a/tests/test_bugzilla.py +++ b/tests/test_bugzilla.py @@ -109,11 +109,13 @@ def request_callback(method, uri, headers): self.assertEqual(bugs[0]['bug_id'][0]['__text__'], '15') self.assertEqual(len(bugs[0]['activity']), 0) self.assertEqual(bugs[0]['__metadata__']['origin'], BUGZILLA_SERVER_URL) + self.assertEqual(bugs[0]['__metadata__']['uuid'], '5a8a1e25dfda86b961b4146050883cbfc928f8ec') self.assertEqual(bugs[0]['__metadata__']['updated_on'], 1248276445.0) self.assertEqual(bugs[6]['bug_id'][0]['__text__'], '888') self.assertEqual(len(bugs[6]['activity']), 14) self.assertEqual(bugs[6]['__metadata__']['origin'], BUGZILLA_SERVER_URL) + self.assertEqual(bugs[6]['__metadata__']['uuid'], 'b4009442d38f4241a4e22e3e61b7cd8ef5ced35c') self.assertEqual(bugs[6]['__metadata__']['updated_on'], 1439404330.0) # Check requests @@ -223,11 +225,13 @@ def request_callback(method, uri, headers): self.assertEqual(bugs[0]['bug_id'][0]['__text__'], '30') self.assertEqual(len(bugs[0]['activity']), 14) self.assertEqual(bugs[0]['__metadata__']['origin'], BUGZILLA_SERVER_URL) + self.assertEqual(bugs[0]['__metadata__']['uuid'], '4b166308f205121bc57704032acdc81b6c9bb8b1') self.assertEqual(bugs[0]['__metadata__']['updated_on'], 1426868155.0) self.assertEqual(bugs[1]['bug_id'][0]['__text__'], '888') self.assertEqual(len(bugs[1]['activity']), 0) self.assertEqual(bugs[1]['__metadata__']['origin'], BUGZILLA_SERVER_URL) + self.assertEqual(bugs[1]['__metadata__']['uuid'], 'b4009442d38f4241a4e22e3e61b7cd8ef5ced35c') self.assertEqual(bugs[1]['__metadata__']['updated_on'], 1439404330.0) # Check requests @@ -352,11 +356,13 @@ def request_callback(method, uri, headers): self.assertEqual(bugs[0]['bug_id'][0]['__text__'], '30') self.assertEqual(len(bugs[0]['activity']), 14) self.assertEqual(bugs[0]['__metadata__']['origin'], BUGZILLA_SERVER_URL) + self.assertEqual(bugs[0]['__metadata__']['uuid'], '4b166308f205121bc57704032acdc81b6c9bb8b1') self.assertEqual(bugs[0]['__metadata__']['updated_on'], 1426868155.0) self.assertEqual(bugs[1]['bug_id'][0]['__text__'], '888') self.assertEqual(len(bugs[1]['activity']), 0) self.assertEqual(bugs[1]['__metadata__']['origin'], BUGZILLA_SERVER_URL) + self.assertEqual(bugs[1]['__metadata__']['uuid'], 'b4009442d38f4241a4e22e3e61b7cd8ef5ced35c') self.assertEqual(bugs[1]['__metadata__']['updated_on'], 1439404330.0) # Check requests @@ -474,11 +480,13 @@ def request_callback(method, uri, headers): self.assertEqual(cached_bugs[0]['bug_id'][0]['__text__'], '15') self.assertEqual(len(cached_bugs[0]['activity']), 0) self.assertEqual(cached_bugs[0]['__metadata__']['origin'], BUGZILLA_SERVER_URL) + self.assertEqual(cached_bugs[0]['__metadata__']['uuid'], '5a8a1e25dfda86b961b4146050883cbfc928f8ec') self.assertEqual(cached_bugs[0]['__metadata__']['updated_on'], 1248276445.0) self.assertEqual(cached_bugs[6]['bug_id'][0]['__text__'], '888') self.assertEqual(len(cached_bugs[6]['activity']), 14) self.assertEqual(cached_bugs[6]['__metadata__']['origin'], BUGZILLA_SERVER_URL) + self.assertEqual(cached_bugs[6]['__metadata__']['uuid'], 'b4009442d38f4241a4e22e3e61b7cd8ef5ced35c') self.assertEqual(cached_bugs[6]['__metadata__']['updated_on'], 1439404330.0) self.assertEqual(len(requests), 13) diff --git a/tests/test_git.py b/tests/test_git.py index f26097014..975a8cfeb 100644 --- a/tests/test_git.py +++ b/tests/test_git.py @@ -36,6 +36,7 @@ sys.path.insert(0, '..') from perceval.errors import ParseError, RepositoryError +from perceval.backend import uuid from perceval.backends.git import (Git, GitCommand, GitParser, @@ -86,9 +87,11 @@ def test_fetch(self): self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): + expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['commit'], expected[x][0]) self.assertEqual(commit['__metadata__']['origin'], self.git_path) + self.assertEqual(commit['__metadata__']['uuid'], expected_uuid) self.assertEqual(commit['__metadata__']['updated_on'], expected[x][1]) shutil.rmtree(new_path) @@ -109,9 +112,11 @@ def test_fetch_since_date(self): self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): + expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['commit'], expected[x][0]) self.assertEqual(commit['__metadata__']['origin'], self.git_path) + self.assertEqual(commit['__metadata__']['uuid'], expected_uuid) self.assertEqual(commit['__metadata__']['updated_on'], expected[x][1]) # Test it using a datetime that includes the timezone @@ -123,9 +128,11 @@ def test_fetch_since_date(self): self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): + expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['commit'], expected[x][0]) self.assertEqual(commit['__metadata__']['origin'], self.git_path) + self.assertEqual(commit['__metadata__']['uuid'], expected_uuid) self.assertEqual(commit['__metadata__']['updated_on'], expected[x][1]) shutil.rmtree(new_path) @@ -160,9 +167,11 @@ def test_fetch_from_file(self): self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): + expected_uuid = uuid('http://example.com.git', expected[x][0]) commit = commits[x] self.assertEqual(commit['commit'], expected[x][0]) self.assertEqual(commit['__metadata__']['origin'], 'http://example.com.git') + self.assertEqual(commit['__metadata__']['uuid'], expected_uuid) self.assertEqual(commit['__metadata__']['updated_on'], expected[x][1]) def test_git_parser(self): diff --git a/tests/test_mbox.py b/tests/test_mbox.py index 402fe694b..71d5f0df8 100644 --- a/tests/test_mbox.py +++ b/tests/test_mbox.py @@ -166,14 +166,14 @@ def test_fetch(self): backend = MBox('http://example.com/', self.tmp_path) messages = [m for m in backend.fetch()] - expected = [('<4CF64D10.9020206@domain.com>', 1291210000.0), - ('<4CF64D10.9020206@domain.com>', 1291210000.0), - ('', 1095843820.0), - ('<87iqzlofqu.fsf@avet.kvota.net>', 1205746505.0), - ('<019801ca633f$f4376140$dca623c0$@yang@example.com>', 1257992964.0), - ('', 1043881350.0), - ('<4CF64D10.9020206@domain.com>', 1291210000.0), - ('<20020823171132.541DB44147@example.com>', 1030123489.0)] + expected = [('<4CF64D10.9020206@domain.com>', '86315b479b4debe320b59c881c1e375216cbf333', 1291210000.0), + ('<4CF64D10.9020206@domain.com>', '86315b479b4debe320b59c881c1e375216cbf333', 1291210000.0), + ('', 'bd0185317b013beb21ad3ea04635de3db72496ad', 1095843820.0), + ('<87iqzlofqu.fsf@avet.kvota.net>', '51535703010a3e63d5272202942c283394cdebca', 1205746505.0), + ('<019801ca633f$f4376140$dca623c0$@yang@example.com>', '302e314c07242bb4750351286862f49e758f3e17', 1257992964.0), + ('', 'ddda42422c55d08d56c017a6f128fcd7447484ea', 1043881350.0), + ('<4CF64D10.9020206@domain.com>', '86315b479b4debe320b59c881c1e375216cbf333', 1291210000.0), + ('<20020823171132.541DB44147@example.com>', '4e255acab6442424ecbf05cb0feb1eccb587f7de', 1030123489.0)] self.assertEqual(len(messages), len(expected)) @@ -181,7 +181,8 @@ def test_fetch(self): message = messages[x] self.assertEqual(message['Message-ID'], expected[x][0]) self.assertEqual(message['__metadata__']['origin'], 'http://example.com/') - self.assertEqual(message['__metadata__']['updated_on'], expected[x][1]) + self.assertEqual(message['__metadata__']['uuid'], expected[x][1]) + self.assertEqual(message['__metadata__']['updated_on'], expected[x][2]) def test_ignore_messages(self): """Test if it ignores some messages without mandatory fields""" @@ -196,7 +197,7 @@ def test_ignore_messages(self): 'From' : 'goran at domain.com ( Göran Lastname )', 'Date' : 'Wed, 01 Dec 2010 14:26:40 +0100', 'Subject' : '[List-name] Protocol Buffers anyone?', - 'Message-id' : '<4CF64D10.9020206@domain.com>', + 'Message-ID' : '<4CF64D10.9020206@domain.com>', 'unixfrom' : 'goran at domain.com Wed Dec 1 08:26:40 2010', 'body': { 'plain' : "Hi!\n\nA message in English, with a signature " @@ -211,9 +212,6 @@ def test_ignore_messages(self): # On the second message, the only change is that 'Message-id' # is replaced by 'Message-ID' - msg_id = expected.pop('Message-id') - expected['Message-ID'] = msg_id - message = messages[1] messages[1].pop('__metadata__') self.assertDictEqual(message, expected)