Skip to content

Commit

Permalink
Merge pull request #2270 from nexB/2263-setup-py-version
Browse files Browse the repository at this point in the history
Improve setup.py version collection #2263
  • Loading branch information
pombredanne authored Oct 2, 2020
2 parents a459598 + 15ebb87 commit f604cf4
Show file tree
Hide file tree
Showing 83 changed files with 16,038 additions and 27 deletions.
275 changes: 252 additions & 23 deletions src/packagedcode/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,29 +31,27 @@
import json
import logging
import os
import re
import sys

import attr
from six import string_types

import dparse
from dparse import filetypes
from pkginfo import BDist
from pkginfo import Develop
from pkginfo import SDist
from pkginfo import UnpackedSDist
from pkginfo import Wheel

import dparse
from dparse import filetypes

from packageurl import PackageURL
import saneyaml
from six import string_types

from commoncode import filetype
from commoncode import fileutils
from commoncode.system import py2
from packagedcode import models
from packagedcode.utils import build_description
from packagedcode.utils import combine_expressions
from packageurl import PackageURL

try:
# Python 2
Expand All @@ -63,7 +61,6 @@
# Python 3
unicode = str # NOQA


"""
Detect and collect Python packages information.
Expand All @@ -90,7 +87,16 @@ def logger_debug(*args):

@attr.s()
class PythonPackage(models.Package):
metafiles = ('metadata.json', '*setup.py', 'PKG-INFO', '*.whl', '*.egg', '*requirements*.txt', '*requirements*.in', '*Pipfile.lock')
metafiles = (
'metadata.json',
'*setup.py',
'PKG-INFO',
'*.whl',
'*.egg',
'*requirements*.txt',
'*requirements*.in',
'*Pipfile.lock',
)
extensions = ('.egg', '.whl', '.pyz', '.pex',)
default_type = 'pypi'
default_primary_language = 'Python'
Expand Down Expand Up @@ -315,7 +321,7 @@ def parse_pipfile_lock(location):
sha256 = None
if '_meta' in data:
for name, meta in data['_meta'].items():
if name=='hash':
if name == 'hash':
sha256 = meta.get('sha256')

package_dependencies = parse_with_dparse(location)
Expand Down Expand Up @@ -345,29 +351,42 @@ def parse_setup_py(location):
# Parse setup.py file and traverse the AST
tree = ast.parse(setup_text)
for statement in tree.body:
# We only care about function calls or assignments to functions named `setup`
if (isinstance(statement, ast.Expr)
or isinstance(statement, ast.Call)
or isinstance(statement, ast.Assign)
and isinstance(statement.value, ast.Call)
and isinstance(statement.value.func, ast.Name)
and statement.value.func.id == 'setup'):
# We only care about function calls or assignments to functions named
# `setup` or `main`
if (isinstance(statement, (ast.Expr, ast.Call, ast.Assign))
and isinstance(statement.value, ast.Call)
and isinstance(statement.value.func, ast.Name)
# we also look for main as sometimes this is used instead of setup()
and statement.value.func.id in ('setup', 'main')
):

# Process the arguments to the setup function
for kw in statement.value.keywords:
for kw in getattr(statement.value, 'keywords', []):
arg_name = kw.arg

if isinstance(kw.value, ast.Str):
setup_args[arg_name] = kw.value.s
if isinstance(kw.value, ast.List):
# We collect the elements of a list if the element is not a function call
setup_args[arg_name] = [elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call)]

elif isinstance(kw.value, (ast.List, ast.Tuple, ast.Set,)):
# We collect the elements of a list if the element
# and tag function calls
value = [
elt.s for elt in kw.value.elts
if not isinstance(elt, ast.Call)
]
setup_args[arg_name] = value

# TODO: what if isinstance(kw.value, ast.Dict)
# or an expression like a call to version=get_version or version__version__

package_name = setup_args.get('name')
if not package_name:
return

description = build_description(
setup_args.get('summary', ''),
setup_args.get('description', ''))
setup_args.get('description', ''),
)

parties = []
author = setup_args.get('author')
Expand All @@ -383,6 +402,15 @@ def parse_setup_py(location):
url=homepage_url
)
)
elif author_email:
parties.append(
models.Party(
type=models.party_person,
email=author_email,
role='author',
url=homepage_url
)
)

declared_license = OrderedDict()
license_setuptext = setup_args.get('license')
Expand All @@ -394,9 +422,14 @@ def parse_setup_py(location):

other_classifiers = [c for c in classifiers if not c.startswith('License')]

detected_version = setup_args.get('version')
if not detected_version:
# search for possible dunder versions here and elsewhere
detected_version = detect_version_attribute(location)

return PythonPackage(
name=package_name,
version=setup_args.get('version'),
version=detected_version,
description=description or None,
homepage_url=setup_args.get('url') or None,
parties=parties,
Expand All @@ -405,6 +438,202 @@ def parse_setup_py(location):
)


def find_pattern(location, pattern):
"""
Search the file at `location` for a patern regex on a single line and return
this or None if not found. Reads the supplied location as text without
importing it.
Code inspired and heavily modified from:
https://github.com/pyserial/pyserial/blob/d867871e6aa333014a77498b4ac96fdd1d3bf1d8/setup.py#L34
SPDX-License-Identifier: BSD-3-Clause
(C) 2001-2020 Chris Liechti <[email protected]>
"""
with io.open(location, encoding='utf8') as fp:
content = fp.read()

match = re.search(pattern, content)
if match:
return match.group(1).strip()


def find_dunder_version(location):
"""
Return a "dunder" __version__ string or None from searching the module file
at `location`.
Code inspired and heavily modified from:
https://github.com/pyserial/pyserial/blob/d867871e6aa333014a77498b4ac96fdd1d3bf1d8/setup.py#L34
SPDX-License-Identifier: BSD-3-Clause
(C) 2001-2020 Chris Liechti <[email protected]>
"""
pattern = re.compile(r"^__version__\s*=\s*['\"]([^'\"]*)['\"]", re.M)
match = find_pattern(location, pattern)
if TRACE: logger_debug('find_dunder_version:', 'location:', location, 'match:', match)
return match


def find_plain_version(location):
"""
Return a plain version attribute string or None from searching the module
file at `location`.
"""
pattern = re.compile(r"^version\s*=\s*['\"]([^'\"]*)['\"]", re.M)
match = find_pattern(location, pattern)
if TRACE: logger_debug('find_plain_version:', 'location:', location, 'match:', match)
return match


def find_setup_py_dunder_version(location):
"""
Return a "dunder" __version__ expression string used as a setup(version)
argument or None from searching the setup.py file at `location`.
For instance:
setup(
version=six.__version__,
...
would return six.__version__.
Code inspired and heavily modified from:
https://github.com/pyserial/pyserial/blob/d867871e6aa333014a77498b4ac96fdd1d3bf1d8/setup.py#L34
SPDX-License-Identifier: BSD-3-Clause
(C) 2001-2020 Chris Liechti <[email protected]>
"""
pattern = re.compile(r"^\s*version\s*=\s*(.*__version__)", re.M)
match = find_pattern(location, pattern)
if TRACE: logger_debug('find_setup_py_dunder_version:', 'location:', location, 'match:', match)
return match


def detect_version_attribute(setup_location):
"""
Return a detected version from a setup.py file at `location` if used as in
a version argument of the setup() function.
Also search for neighbor files for __version__ and common patterns.
"""
# search for possible dunder versions here and elsewhere
setup_version_arg = find_setup_py_dunder_version(setup_location)
setup_py__version = find_dunder_version(setup_location)
if TRACE:
logger_debug(' detect_dunder_version:', 'setup_location:', setup_location)
logger_debug(' setup_version_arg:', repr(setup_version_arg),)
logger_debug(' setup_py__version:', repr(setup_py__version),)
if setup_version_arg == '__version__' and setup_py__version:
version = setup_py__version or None
if TRACE: logger_debug(' detect_dunder_version: A:', version)
return version

# here we have a more complex __version__ location
# we start by adding the possible paths and file name
# and we look at these in sequence

candidate_locs = []

if setup_version_arg and '.' in setup_version_arg:
segments = setup_version_arg.split('.')[:-1]
else:
segments = []

special_names = (
'__init__.py',
'__main__.py',
'__version__.py',
'__about__.py',
'__version.py',
'_version.py',
'version.py',
'VERSION.py',
'package_data.py',
)

setup_py_dir = fileutils.parent_directory(setup_location)
src_dir = os.path.join(setup_py_dir, 'src')
has_src = os.path.exists(src_dir)

if segments:
for n in special_names:
candidate_locs.append(segments + [n])
if has_src:
for n in special_names:
candidate_locs.append(['src'] + segments + [n])

if len(segments) > 1:
heads = segments[:-1]
tail = segments[-1]
candidate_locs.append(heads + [tail + '.py'])
if has_src:
candidate_locs.append(['src'] + heads + [tail + '.py'])

else:
seg = segments[0]
candidate_locs.append([seg + '.py'])
if has_src:
candidate_locs.append(['src', seg + '.py'])

candidate_locs = [os.path.join(setup_py_dir, *cand_loc_segs) for cand_loc_segs in candidate_locs]

for fl in get_module_scripts(
location=setup_py_dir,
max_depth=4,
interesting_names=special_names,
):
candidate_locs.append(fl)

if TRACE:
for loc in candidate_locs:
logger_debug(' can loc:', loc)

version = detect_version_in_locations(
candidate_locs=candidate_locs,
detector=find_dunder_version
)

if version:
return version

return detect_version_in_locations(
candidate_locs=candidate_locs,
detector=find_plain_version,
)


def detect_version_in_locations(candidate_locs, detector=find_plain_version):
"""
Return the first version found in a location from `candidate_locs` using the
`detector` callable. Or None.
"""
for loc in candidate_locs:
if os.path.exists(loc):
if TRACE: logger_debug('detect_version_in_locations:', 'loc:', loc)
# here the file exists try to get a dunder version
version = detector(loc)
if TRACE: logger_debug('detect_version_in_locations:', 'detector', detector, 'version:', version)
if version:
return version


def get_module_scripts(location, max_depth=1, interesting_names=()):
"""
Yield interesting Python script paths that have a name in
`interesting_names` by walking the `location` directory recursively up to
`max_depth` path segments extending from the root `location`.
"""

location = location.rstrip(os.path.sep)
current_depth = max_depth
for top, _dirs, files in os.walk(location):
if current_depth == 0:
break
for f in files:
if f in interesting_names:
path = os.path.join(top, f)
if TRACE: logger_debug('get_module_scripts:', 'path', path)
yield path

current_depth -= 1


# FIXME: use proper library for parsing these
def parse_metadata(location):
"""
Expand Down
12 changes: 11 additions & 1 deletion src/scancode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from itertools import islice
from os.path import getsize
import logging
import os
import sys

from commoncode.filetype import get_last_modified_date
Expand Down Expand Up @@ -287,6 +288,9 @@ def _licenses_data_from_match(
return detected_licenses


SCANCODE_DEBUG_PACKAGE_API = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)


def get_package_info(location, **kwargs):
"""
Return a mapping of package manifest information detected in the
Expand All @@ -303,7 +307,13 @@ def get_package_info(location, **kwargs):
except Exception as e:
if TRACE:
logger.error('get_package_info: {}: Exception: {}'.format(location, e))
pass

if SCANCODE_DEBUG_PACKAGE_API:
raise
else:
# attention: we are swallowing ALL exceptions here!
pass

return dict(packages=[])


Expand Down
Loading

0 comments on commit f604cf4

Please sign in to comment.