Skip to content

Commit

Permalink
Merge branch 'uuid' of 'git://github.com/grimoirelab/perceval.git'
Browse files Browse the repository at this point in the history
  • Loading branch information
sduenas committed Mar 14, 2016
2 parents 65a2d7e + 9d00cbf commit 8201934
Show file tree
Hide file tree
Showing 12 changed files with 344 additions and 145 deletions.
77 changes: 58 additions & 19 deletions perceval/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import argparse
import functools
import hashlib
import sys

from .cache import Cache
Expand All @@ -37,6 +38,9 @@ class Backend:
Derivated classes have to implement `fetch` and `fetch_from_cache`
methods. Otherwise, `NotImplementedError` exception will be raised.
Metadata decorator can be used together with fetch methods but
requires the implementation of `metadata_id` and `metadata_updated_on`
static methods.
To track which version of the backend was used during the fetching
process, this class provides a `version` attribute that each backend
Expand Down Expand Up @@ -70,6 +74,14 @@ def fetch(self, from_date=DEFAULT_DATETIME):
def fetch_from_cache(self):
raise NotImplementedError

@staticmethod
def metadata_id(item):
raise NotImplementedError

@staticmethod
def metadata_updated_on(item):
raise NotImplementedError

def _purge_cache_queue(self):
self.cache_queue = []

Expand Down Expand Up @@ -138,34 +150,61 @@ def create_argument_parser(cls):
return parser


def metadata(fdate):
def metadata(func):
"""Add metadata to an item.
Decorator that adds metadata to a given item such as how and
when it was fetched.
As input parameters, this function requieres as function which
extracts from an item when it was updated.
Take into account that this decorator can only be called from a
'Backend' class due it needs access to some of the attributes
of this class.
and methods of this class.
"""
from datetime import datetime as dt

META_KEY = '__metadata__'

def metadata_decorator(func):
@functools.wraps(func)
def decorator(self, *args, **kwargs):
for item in func(self, *args, **kwargs):
item[META_KEY] = {
'backend_name' : self.__class__.__name__,
'backend_version': self.version,
'timestamp' : dt.now().timestamp(),
'origin' : self.origin,
'updated_on' : fdate(item),
}
yield item
return decorator
return metadata_decorator
@functools.wraps(func)
def decorator(self, *args, **kwargs):
for item in func(self, *args, **kwargs):
item[META_KEY] = {
'backend_name' : self.__class__.__name__,
'backend_version': self.version,
'timestamp' : dt.now().timestamp(),
'origin' : self.origin,
'uuid' : uuid(self.origin, self.metadata_id(item)),
'updated_on' : self.metadata_updated_on(item),
}
yield item
return decorator


def uuid(*args):
"""Generate a UUID based on the given parameters.
The UUID will be the SHA1 of the concatenation of the values
from the list. The separator bewteedn these values is ':'.
Each value must be a non-empty string, otherwise, the function
will raise an exception.
:param *args: list of arguments used to generate the UUID
:returns: a universal unique identifier
:raises ValueError: when anyone of the values is not a string,
is empty or `None`.
"""
def check_value(v):
if not isinstance(v, str):
raise ValueError("%s value is not a string instance" % str(v))
elif not v:
raise ValueError("value cannot be None or empty")
else:
return v

s = ':'.join(map(check_value, args))

sha1 = hashlib.sha1(s.encode('utf-8'))
uuid_sha1 = sha1.hexdigest()

return uuid_sha1
44 changes: 27 additions & 17 deletions perceval/backends/bugzilla.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,6 @@
logger = logging.getLogger(__name__)


def get_update_time(item):
"""Extracts and coverts the update time from a Bugzilla item.
The timestamp is extracted from 'delta_ts' field. This date is
converted to UNIX timestamp format. Due Bugzilla servers ignore
the timezone on HTTP requests, it will be ignored during the
conversion, too.
"""
ts = item['delta_ts'][0]['__text__']
ts = str_to_datetime(ts)
ts = ts.replace(tzinfo=dateutil.tz.tzutc())

return ts.timestamp()


class Bugzilla(Backend):
"""Bugzilla backend.
Expand All @@ -80,7 +65,7 @@ def __init__(self, url, user=None, password=None,
self.max_bugs = max(1, max_bugs)
self.client = BugzillaClient(url, user=user, password=password)

@metadata(get_update_time)
@metadata
def fetch(self, from_date=DEFAULT_DATETIME):
"""Fetch the bugs from the repository.
Expand Down Expand Up @@ -122,7 +107,7 @@ def fetch(self, from_date=DEFAULT_DATETIME):
logger.info("Fetch process completed: %s/%s bugs fetched",
nbugs, tbugs)

@metadata(get_update_time)
@metadata
def fetch_from_cache(self):
"""Fetch the bugs from the cache.
Expand Down Expand Up @@ -205,6 +190,31 @@ def __fetch_and_parse_bug_activity(self, bug_id):
activity = self.parse_bug_activity(raw_activity)
return [event for event in activity]

@staticmethod
def metadata_id(item):
"""Extracts the identifier from a Bugzilla item."""

return item['bug_id'][0]['__text__']

@staticmethod
def metadata_updated_on(item):
"""Extracts and coverts the update time from a Bugzilla item.
The timestamp is extracted from 'delta_ts' field. This date is
converted to UNIX timestamp format. Due Bugzilla servers ignore
the timezone on HTTP requests, it will be ignored during the
conversion, too.
:param item: item generated by the backend
:returns: a UNIX timestamp
"""
ts = item['delta_ts'][0]['__text__']
ts = str_to_datetime(ts)
ts = ts.replace(tzinfo=dateutil.tz.tzutc())

return ts.timestamp()

@staticmethod
def parse_buglist(raw_csv):
"""Parse a Bugzilla CSV bug list.
Expand Down
28 changes: 21 additions & 7 deletions perceval/backends/gerrit.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,6 @@
logger = logging.getLogger(__name__)


def get_update_time(item):
"""Extracts the update time from a Gerrit item"""
return float(item['lastUpdated'])


class Gerrit(Backend):
"""Gerrit backend.
Expand All @@ -64,7 +59,7 @@ def __init__(self, url, user=None, max_reviews=None, cache=None):
self.max_reviews = max_reviews
self.client = GerritClient(self.url, user, max_reviews)

@metadata(get_update_time)
@metadata
def fetch(self, from_date=DEFAULT_DATETIME):
"""Fetch the reviews from the repository.
Expand Down Expand Up @@ -99,7 +94,7 @@ def fetch(self, from_date=DEFAULT_DATETIME):
last_item = self.client.next_retrieve_group_item(last_item, review)
reviews = self._get_reviews(last_item)

@metadata(get_update_time)
@metadata
def fetch_from_cache(self):
"""Fetch reviews from the cache.
Expand Down Expand Up @@ -133,6 +128,25 @@ def _get_reviews(self, last_item):
time.time()-task_init))
return reviews

@staticmethod
def metadata_id(item):
"""Extracts the identifier from a Gerrit item."""

return item['id']

@staticmethod
def metadata_updated_on(item):
"""Extracts and converts the update time from a Gerrit item.
The timestamp is extracted from 'lastUpdated' field. This date is
a UNIX timestamp but needs to be converted to a float value.
:param item: item generated by the backend
:returns: a UNIX timestamp
"""
return float(item['lastUpdated'])

@staticmethod
def parse_reviews(raw_data):
"""Parse a Gerrit reviews list."""
Expand Down
38 changes: 24 additions & 14 deletions perceval/backends/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,6 @@
logger = logging.getLogger(__name__)


def get_update_time(item):
"""Extracts the update time from a Git item.
The timestamp used is extracted from 'CommitDate' field.
This date is converted to UNIX timestamp format taking into
account the timezone of the date.
"""
ts = item['CommitDate']
ts = str_to_datetime(ts)

return ts.timestamp()


class Git(Backend):
"""Git backend.
Expand All @@ -72,7 +59,7 @@ def __init__(self, uri, gitpath, cache=None):
self.uri = uri
self.gitpath = gitpath

@metadata(get_update_time)
@metadata
def fetch(self, from_date=DEFAULT_DATETIME):
"""Fetch commits.
Expand Down Expand Up @@ -128,6 +115,29 @@ def __create_and_update_git_repository(self):

return repo

@staticmethod
def metadata_id(item):
"""Extracts the identifier from a Git item."""

return item['commit']

@staticmethod
def metadata_updated_on(item):
"""Extracts the update time from a Git item.
The timestamp used is extracted from 'CommitDate' field.
This date is converted to UNIX timestamp format taking into
account the timezone of the date.
:param item: item generated by the backend
:returns: a UNIX timestamp
"""
ts = item['CommitDate']
ts = str_to_datetime(ts)

return ts.timestamp()

@staticmethod
def parse_git_log_from_file(filepath):
"""Parse a Git log file.
Expand Down
40 changes: 25 additions & 15 deletions perceval/backends/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,6 @@
logger = logging.getLogger(__name__)


def get_update_time(item):
"""Extracts the update time from a GitHub item.
The timestamp used is extracted from 'updated_at' field.
This date is converted to UNIX timestamp format. As GitHub
dates are in UTC the conversion is straightforward.
"""
ts = item['updated_at']
ts = str_to_datetime(ts)

return ts.timestamp()


class GitHub(Backend):
"""GitHub backend for Perceval.
Expand Down Expand Up @@ -102,7 +89,7 @@ def __get_user(self, login):

return user

@metadata(get_update_time)
@metadata
def fetch(self, from_date=DEFAULT_DATETIME):
"""Fetch the issues from the repository.
Expand Down Expand Up @@ -132,7 +119,7 @@ def fetch(self, from_date=DEFAULT_DATETIME):
issue[field+"_data"] = {}
yield issue

@metadata(get_update_time)
@metadata
def fetch_from_cache(self):
"""Fetch the issues from the cache.
Expand Down Expand Up @@ -196,6 +183,29 @@ def __build_issues(self, issues):
self._users[issue[field]['login']]
yield issue

@staticmethod
def metadata_id(item):
"""Extracts the identifier from a GitHub item."""

return str(item['id'])

@staticmethod
def metadata_updated_on(item):
"""Extracts the update time from a GitHub item.
The timestamp used is extracted from 'updated_at' field.
This date is converted to UNIX timestamp format. As GitHub
dates are in UTC the conversion is straightforward.
:param item: item generated by the backend
:returns: a UNIX timestamp
"""
ts = item['updated_at']
ts = str_to_datetime(ts)

return ts.timestamp()


class GitHubClient:
""" Client for retieving information from GitHub API """
Expand Down
Loading

0 comments on commit 8201934

Please sign in to comment.