Skip to content

Commit

Permalink
Move the LoopProgress class to the pipeline module #1351
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez committed Aug 8, 2024
1 parent c83f677 commit 4c5aed3
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 33 deletions.
130 changes: 101 additions & 29 deletions aboutcode/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,34 +28,7 @@
from pydoc import splitdoc
from timeit import default_timer as timer

logger = logging.getLogger(__name__)


def group(*groups):
"""Mark a function as part of a particular group."""

def decorator(obj):
if hasattr(obj, "groups"):
obj.groups = obj.groups.union(groups)
else:
setattr(obj, "groups", set(groups))
return obj

return decorator


def humanize_time(seconds):
"""Convert the provided ``seconds`` number into human-readable time."""
message = f"{seconds:.0f} seconds"

if seconds > 86400:
message += f" ({seconds / 86400:.1f} days)"
if seconds > 3600:
message += f" ({seconds / 3600:.1f} hours)"
elif seconds > 60:
message += f" ({seconds / 60:.1f} minutes)"

return message
module_logger = logging.getLogger(__name__)


class BasePipeline:
Expand Down Expand Up @@ -157,7 +130,7 @@ def log(self, message):
now_local = datetime.now(timezone.utc).astimezone()
timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
message = f"{timestamp} {message}"
logger.info(message)
module_logger.info(message)
self.run.append_to_log(message)

@staticmethod
Expand Down Expand Up @@ -211,3 +184,102 @@ def execute(self):
self.log(f"Pipeline completed in {humanize_time(pipeline_run_time)}")

return 0, ""


def group(*groups):
"""Mark a function as part of a particular group."""

def decorator(obj):
if hasattr(obj, "groups"):
obj.groups = obj.groups.union(groups)
else:
setattr(obj, "groups", set(groups))
return obj

return decorator


def humanize_time(seconds):
"""Convert the provided ``seconds`` number into human-readable time."""
message = f"{seconds:.0f} seconds"

if seconds > 86400:
message += f" ({seconds / 86400:.1f} days)"
if seconds > 3600:
message += f" ({seconds / 3600:.1f} hours)"
elif seconds > 60:
message += f" ({seconds / 60:.1f} minutes)"

return message


class LoopProgress:
"""
A context manager for logging progress in loops.
Usage::
total_iterations = 100
logger = print # Replace with your actual logger function
progress = LoopProgress(total_iterations, logger, progress_step=10)
for item in progress.iter(iterator):
"Your processing logic here"
# As a context manager
with LoopProgress(total_iterations, logger, progress_step=10) as progress:
for item in progress.iter(iterator):
"Your processing logic here"
"""

def __init__(self, total_iterations, logger, progress_step=10):
self.total_iterations = total_iterations
self.logger = logger
self.progress_step = progress_step
self.start_time = timer()
self.last_logged_progress = 0
self.current_iteration = 0

def get_eta(self, current_progress):
run_time = timer() - self.start_time
return round(run_time / current_progress * (100 - current_progress))

@property
def current_progress(self):
return int((self.current_iteration / self.total_iterations) * 100)

@property
def eta(self):
run_time = timer() - self.start_time
return round(run_time / self.current_progress * (100 - self.current_progress))

def log_progress(self):
reasons_to_skip = [
not self.logger,
not self.current_iteration > 0,
self.total_iterations <= self.progress_step,
]
if any(reasons_to_skip):
return

if self.current_progress >= self.last_logged_progress + self.progress_step:
msg = (
f"Progress: {self.current_progress}% "
f"({self.current_iteration}/{self.total_iterations})"
)
if eta := self.eta:
msg += f" ETA: {humanize_time(eta)}"

self.logger(msg)
self.last_logged_progress = self.current_progress

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
pass

def iter(self, iterator):
for item in iterator:
self.current_iteration += 1
self.log_progress()
yield item
2 changes: 1 addition & 1 deletion scanpipe/pipes/d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@
from packagedcode.npm import NpmPackageJsonHandler
from summarycode.classify import LEGAL_STARTS_ENDS

from aboutcode.pipeline import LoopProgress
from scanpipe import pipes
from scanpipe.models import CodebaseRelation
from scanpipe.models import CodebaseResource
from scanpipe.models import convert_glob_to_django_regex
from scanpipe.pipes import LoopProgress
from scanpipe.pipes import flag
from scanpipe.pipes import get_resource_diff_ratio
from scanpipe.pipes import js
Expand Down
2 changes: 1 addition & 1 deletion scanpipe/pipes/purldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from univers.version_range import RANGE_CLASS_BY_SCHEMES
from univers.version_range import InvalidVersionRange

from scanpipe.pipes import LoopProgress
from aboutcode.pipeline import LoopProgress
from scanpipe.pipes import _clean_package_data


Expand Down
2 changes: 1 addition & 1 deletion scanpipe/pipes/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipes import LoopProgress
from aboutcode.pipeline import LoopProgress


class XgettextNotFound(Exception):
Expand Down
2 changes: 1 addition & 1 deletion scanpipe/pipes/symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from django.db.models import Q

from scanpipe.pipes import LoopProgress
from aboutcode.pipeline import LoopProgress


class UniversalCtagsNotFound(Exception):
Expand Down

0 comments on commit 4c5aed3

Please sign in to comment.