Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ISSUE-177 Parallelize SAM algorithm in mineraly.py using Joblib #181

Merged
merged 10 commits into from
Nov 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions examples/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# Floor, Boston, MA 02110-1301, USA.
# encoding: utf-8

DEBUG = 1
TESTRUN = 1
DEBUG = 0
TESTRUN = 0
PROFILE = 0

# TODO: Also demonstrate this functionality in a Jupyter Notebook
Expand Down Expand Up @@ -58,5 +58,5 @@
# Only applies to example_mineral.py
# For reference, the small image has the shape 931X339 (931 rows and 339
# columns)
MINERAL_SUBSET_ROWS = [0, 75]
MINERAL_SUBSET_COLS = [0, 75]
MINERAL_SUBSET_ROWS = None #[0, 75]
MINERAL_SUBSET_COLS = None #[0, 75]
2 changes: 1 addition & 1 deletion examples/example_mineral.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def run_mineral(input_filename=constants.INPUT_FILENAME,

# generate a georeferenced visible-light image
mineral_classification.to_rgb(input_filename, rgb_filename)

# generate a mineral classified image
mineral_classification.classify_image(input_filename, classified_filename)

Expand Down
124 changes: 73 additions & 51 deletions pycoal/mineral.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,62 @@
import time
import fnmatch
import shutil
import multiprocessing
from joblib import Parallel, delayed
aheermann marked this conversation as resolved.
Show resolved Hide resolved


def calculate_pixel_confidence_value(pixel, angles_m, threshold,
resample, scores_file_name):
"""
Calculate the confidence score for a pixel
Is run in parallel on CPU through joblib

Args:
pixel (int[]): numpy memmap of pixel's values
angles_m (numpy array): universal calculated angles
threshold (float): classification threshold
resample (BandResampler): defined resampler for bands
scores_file_name (str): filename of the image to hold
each pixel's classification score

Returns:
confidence value (float)

"""

# if it is not a no data pixel
if not numpy.isclose(pixel[0], -0.005) and not pixel[0] == -50:
# resample the pixel ignoring NaNs from target bands that don't overlap
resampled_pixel = numpy.nan_to_num(resample(pixel))
# calculate spectral angles
# calculate spectral angles
# Adapted from Spectral library
resampled_data = resampled_pixel[numpy.newaxis, numpy.newaxis, ...]
norms = numpy.sqrt(numpy.einsum('ijk,ijk->ij', resampled_data, resampled_data))
dots = numpy.einsum('ijk,mk->ijm', resampled_data, angles_m)
dots = numpy.clip(dots / norms[:, :, numpy.newaxis], -1, 1)
angles = numpy.arccos(dots)

# normalize confidence values from [pi,0] to [0,1]
angles[0, 0, :] = 1 - angles[0, 0, :] / math.pi

# get index of class with largest confidence value
index_of_max = numpy.argmax(angles)

# get confidence value of the classified pixel
score = angles[0, 0, index_of_max]

# classify pixel if confidence above threshold
if score > threshold:

# index from one (after zero for no data)
classified_value = index_of_max + 1
if scores_file_name is not None:
# store score value
return score, classified_value
return 0.0, classified_value
return 0.0, 0


"""
Classifier callbacks functions must have at least the following args: library,
Expand Down Expand Up @@ -78,7 +134,7 @@ def SAM(image_file_name, classified_file_name, library_file_name,
# open the image
image = spectral.open_image(image_file_name)
if subset_rows is not None and subset_cols is not None:
subset_image = SubImage(image, subset_rows, subset_cols)
data = SubImage(image, subset_rows, subset_cols)
m = subset_rows[1]
n = subset_cols[1]
else:
Expand All @@ -99,62 +155,29 @@ def SAM(image_file_name, classified_file_name, library_file_name,

# allocate a zero-initialized MxN array for the classified image
classified = numpy.zeros(shape=(m, n), dtype=numpy.uint16)

if scores_file_name is not None:
# allocate a zero-initialized MxN array for the scores image
scored = numpy.zeros(shape=(m, n), dtype=numpy.float64)

# universal calculations for angles
# Adapted from Spectral library
angles_m = numpy.array(library.spectra, dtype=numpy.float64)
angles_m /= numpy.sqrt(numpy.einsum('ij,ij->i', angles_m, angles_m))[:, numpy.newaxis]

# for each pixel in the image
for x in range(m):

for y in range(n):

# read the pixel from the file
if subset_rows is not None and subset_cols is not None:
pixel = subset_image.read_pixel(x, y)
else:
pixel = data[x, y]

# if it is not a no data pixel
if not numpy.isclose(pixel[0], -0.005) and not pixel[0] == -50:

# resample the pixel ignoring NaNs from target bands that
# don't overlap
# TODO fix spectral library so that bands are in order
resampled_pixel = numpy.nan_to_num(resample(pixel))

# calculate spectral angles
# Adapted from Spectral library
resampled_data = resampled_pixel[numpy.newaxis, numpy.newaxis, ...]
norms = numpy.sqrt(numpy.einsum('ijk,ijk->ij', resampled_data, resampled_data))
dots = numpy.einsum('ijk,mk->ijm', resampled_data, angles_m)
dots = numpy.clip(dots / norms[:, :, numpy.newaxis], -1, 1)
angles = numpy.arccos(dots)

# normalize confidence values from [pi,0] to [0,1]
angles[0, 0, :] = 1 - angles[0, 0, :] / math.pi

# get index of class with largest confidence value
index_of_max = numpy.argmax(angles)

# get confidence value of the classified pixel
score = angles[0, 0, index_of_max]

# classify pixel if confidence above threshold
if score > threshold:

# index from one (after zero for no data)
classified[x, y] = index_of_max + 1

if scores_file_name is not None:
# store score value
scored[x, y] = score
num_cores = multiprocessing.cpu_count()
pixel_confidences = numpy.array(Parallel(n_jobs=num_cores)(delayed(
calculate_pixel_confidence_value)(data[x, y],
angles_m, threshold, resample, scores_file_name)
for x in range(m) for y in range(n)))

# puts it all in one single array, need to make 2d array
k = 0
for i in range(m):
for j in range(n):
if scores_file_name is not None:
scored[i][j] = pixel_confidences[k][0]
classified[i][j] = pixel_confidences[k][1]
k += 1
# save the classified image to a file
spectral.io.envi.save_classification(classified_file_name, classified,
class_names=['No data'] +
Expand Down Expand Up @@ -730,10 +753,9 @@ def convert(cls, library_filename=""):
if "errorbars" not in items:
if "Wave" not in items:
if "SpectraValues" not in items:
shutil.copy2(os.path.join(root, items),
directory)
shutil.copy2(os.path.join(root, items), directory)

# This will take the .txt files for Spectra in USGS Spectral Version
# This will take the .txt files for Spectra in USGS Spectral Version 7 and
# 7 and
# convert their format to match that of ASTER .spectrum.txt files
# for spectra
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
'Modules', ]
_description = 'COAL mining library for AVIRIS data.'
_download_url = 'http://pypi.python.org/pypi/pycoal/'
_requirements = ["numpy", "spectral", "guzzle_sphinx_theme"]
_requirements = ["numpy", "spectral", "guzzle_sphinx_theme", "joblib", "psutil"]
_keywords = ['spectroscopy', 'aviris', 'aviris-ng', 'mining', 'minerals']
_license = 'GNU GENERAL PUBLIC LICENSE, Version 2'
_long_description = 'A python suite for the identification and ' \
Expand Down