Skip to content

Commit

Permalink
Fill out function docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
acrellin committed Oct 7, 2016
1 parent 463e841 commit c74004e
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 7 deletions.
43 changes: 41 additions & 2 deletions cesium/build_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,19 @@


def rectangularize_featureset(featureset):
"""Convert xarray.Dataset into (2d) Pandas.DataFrame for use with sklearn."""
"""Convert xarray.Dataset into (2d) Pandas.DataFrame for use with sklearn.
Params
------
featureset : xarray.Dataset
The xarray.Dataset object containing features.
Returns
-------
Pandas.Dataframe
2-D, sklearn-compatible Dataframe containing features.
"""
featureset = featureset.drop([coord for coord in featureset.coords
if coord not in ['name', 'channel']])
feature_df = featureset.to_dataframe()
Expand Down Expand Up @@ -71,7 +83,34 @@ def fit_model_optimize_hyperparams(data, targets, model, params_to_optimize,
def build_model_from_featureset(featureset, model=None, model_type=None,
model_options={}, params_to_optimize=None,
cv=None):
"""Build model from (non-rectangular) xarray.Dataset of features."""
"""Build model from (non-rectangular) xarray.Dataset of features.
Parameters
----------
featureset : xarray.Dataset of features
Features for training model.
model : scikit-learn model, optional
Instantiated scikit-learn model. If None, `model_type` must not be.
Defaults to None.
model_type : str, optional
String indicating model to be used, e.g. "RandomForestClassifier".
If None, `model` must not be. Defaults to None.
model_options : dict, optional
Dictionary with hyperparameter values to be used in model building.
params_to_optimize : list of str, optional
List of parameters to be optimized (whose corresponding entries
in `model_options` would be a list of values to try). Defaults to None.
cv : int, cross-validation generator or an iterable, optional
Number of folds (defaults to 3) or an iterable yielding train/test
splits. See documentation for `GridSearchCV` for details. Defaults to
None.
Returns
-------
sklearn estimator object
The fitted sklearn model.
"""
if featureset.get('target') is None:
raise ValueError("Cannot build model for unlabeled feature set.")

Expand Down
56 changes: 55 additions & 1 deletion cesium/datasets/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,22 @@ def _md5sum_file(path):


def download_file(data_dir, base_url, filename):
"""Download a single file into the given directory."""
"""Download a single file into the given directory.
Parameters
----------
data_dir : str
Path to directory in which to save file.
base_url : str
URL of file to download, minus the file name.
filename : str
Name of file to be downloaded.
Returns
-------
str
The path to the newly downloaded file.
"""
if not os.path.exists(data_dir):
os.makedirs(data_dir)

Expand All @@ -43,6 +58,26 @@ def download_and_extract_archives(data_dir, base_url, filenames, md5sums=None,
remove_archive=True):
"""Download list of data archives, verify md5 checksums (if applicable),
and extract into the given directory.
Parameters
----------
data_dir : str
Path to directory in which to download and extract archives.
base_url : str
URL of files to download, minus the file names.
filenames : list or tuple of str
Name of file to be downloaded.
md5sums : dict, optional
Dictionary whose keys are file names and values are
corresponding hexadecimal md5 checksums to be checked against.
remove_archive : bool, optional
Boolean indicating whether to delete the archive(s) from disk
after the contents have been extracted. Defaults to True.
Returns
-------
list of str
The paths to the newly downloaded and unzipped files.
"""
if not os.path.exists(data_dir):
os.makedirs(data_dir)
Expand All @@ -67,6 +102,13 @@ def download_and_extract_archives(data_dir, base_url, filenames, md5sums=None,
def build_time_series_archive(archive_path, ts_paths):
"""Write a .tar.gz archive containing the given time series files, as
required for data uploaded via the front end.
Parameters
----------
archive_path : str
Path at which to create the tarfile.
ts_paths : list of str
Paths to time-series file to be included in tarfile.
"""
with tarfile.TarFile(archive_path, 'w') as t:
for fname in ts_paths:
Expand All @@ -76,6 +118,18 @@ def build_time_series_archive(archive_path, ts_paths):
def write_header(header_path, filenames, classes, metadata={}):
"""Write a header file for the given time series files, as required for
data uploaded via the front end.
Parameters
----------
header_path : str
Path at which header file will be created.
filenames : list of str
List of time-series file names associated with header file.
classes : list of str
List of class names associated with each time-series file.
metadata : dict, optional
Dictionary describing meta features associated with each time-series.
Keys are time-series file names.
"""
data_dict = {'filename': [util.shorten_fname(f) for f in filenames],
'class': classes}
Expand Down
41 changes: 37 additions & 4 deletions cesium/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,52 @@


def shorten_fname(file_path):
"""Extract the name of a file (omitting directory names and extensions)."""
"""Extract the name of a file (omitting directory names and extensions).
Parameters
----------
file_path : str
Absolute or relative path to a file.
Returns
-------
str
The name of the file with directory names and extensions removed.
"""
return os.path.splitext(os.path.basename(file_path))[0]


def make_list(x):
if isinstance(x, collections.Iterable) and not isinstance(x, str):
"""Wrap `x` in a list if it isn't already a list or tuple.
Parameters
----------
x : any valid object
The parameter to be wrapped in a list.
Returns
-------
list or tuple
Returns `[x]` if `x` is not already a list or tuple, otherwise
returns `x`.
"""
if isinstance(x, collections.Iterable) and not isinstance(x, (str, dict)):
return x
else:
return [x,]
return [x]


def remove_files(paths):
"""Remove specified files from disk."""
"""Remove specified file(s) from disk.
Parameters
----------
paths : str or list of str
Path(s) to file(s) to be removed from disk.
"""
paths = make_list(paths)
for path in paths:
try:
Expand Down

0 comments on commit c74004e

Please sign in to comment.