Skip to content

Commit

Permalink
work on RIOS versions of tiling stats
Browse files Browse the repository at this point in the history
  • Loading branch information
gillins committed Jun 28, 2024
1 parent 1822bba commit 11d9b25
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 5 deletions.
13 changes: 11 additions & 2 deletions parallel_examples/awsbatch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,18 @@ RUN cd /tmp \
&& make install \
&& cd ../.. \
&& rm -rf kealib-${KEALIB_VERSION} kealib-${KEALIB_VERSION}.tar.gz

ENV RIOS_VERSION=2.0.3
RUN cd /tmp \
&& wget -q https://github.com/ubarsc/rios/releases/download/rios-${RIOS_VERSION}/rios-${RIOS_VERSION}.tar.gz \
&& tar xf rios-${RIOS_VERSION}.tar.gz \
&& cd rios-${RIOS_VERSION} \
&& DEB_PYTHON_INSTALL_LAYOUT=deb_system pip install . \
&& cd .. \
&& rm -rf rios-${RIOS_VERSION} rios-${RIOS_VERSION}.tar.gz

COPY pyshepseg-$PYSHEPSEG_VER.tar.gz /tmp
# install RIOS
# install pyshegseg
RUN cd /tmp && tar xf pyshepseg-$PYSHEPSEG_VER.tar.gz \
&& cd pyshepseg-$PYSHEPSEG_VER \
&& DEB_PYTHON_INSTALL_LAYOUT=deb_system pip install . \
Expand Down Expand Up @@ -73,9 +82,9 @@ RUN apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/*
USER $SERVICEUSER

# a few quick tests
#RUN gdal_translate --formats | grep KEA
RUN python3 -c 'from osgeo import gdal;assert(gdal.GetDriverByName("KEA") is not None)'
RUN python3 -c 'from pyshepseg import tiling'
RUN python3 -c 'from rios import applier'

# export the volume
VOLUME $SW_VOLUME
Expand Down
4 changes: 2 additions & 2 deletions parallel_examples/awsbatch/do_stitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ def main():
dataForStats = json.load(fileobj)
for img, bandnum, selection in dataForStats:
print(img, bandnum, selection)
tilingstats.calcPerSegmentStatsTiled(img, bandnum,
localDs, selection)
tilingstats.calcPerSegmentStatsTiledRIOS(img, bandnum,
localDs, selection, numReadWorkers=4)

if cmdargs.spatialstats is not None:
bucket, spatialstatsKey = cmdargs.spatialstats.split(':')
Expand Down
2 changes: 1 addition & 1 deletion parallel_examples/awsbatch/template/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ Resources:
JobDefinitionName: PyShepSegBatchJobDefinitionStitch
ContainerProperties:
Image: !Join ['', [!GetAtt BatchRepository.RepositoryUri, ":latest"]]
Vcpus: 1
Vcpus: 4
Memory: 8000
RetryStrategy:
Attempts: 1
Expand Down
173 changes: 173 additions & 0 deletions pyshepseg/tilingstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@
from . import tiling
from . import shepseg

HAVE_RIOS = False
try:
from rios import applier
from rios import ratapplier
except ImportError:
pass

gdal.UseExceptions()
osr.UseExceptions()

Expand Down Expand Up @@ -110,6 +117,9 @@ def calcPerSegmentStatsTiled(imgfile, imgbandnum, segfile,
The 'pixcount' statName can be used to find the number of
valid pixels (not nodata) that were used to calculate the statistics.
missingStatsValue : int or float
What to set for segments that have no valid pixels in imgile
"""
segds, segband, imgds, imgband = doImageAlignmentChecks(segfile,
imgfile, imgbandnum)
Expand Down Expand Up @@ -165,6 +175,169 @@ def calcPerSegmentStatsTiled(imgfile, imgbandnum, segfile,
raise PyShepSegStatsError('Not all pixels found during processing')


def rioscalcPerSegmentStatsTiled(info, inputs, outputs, otherArgs):
"""
Called be RIOS from inside calcPerSegmentStatsTiledRIOS. Do
accumulation of statistics
"""
accumulateSegDict(otherArgs.segDict, otherArgs.noDataDict,
otherArgs.imgNullVal, inputs.segfile[0], inputs.imgfile[0])
calcStatsForCompletedSegs(otherArgs.segDict, otherArgs.noDataDict,
otherArgs.missingStatsValue, otherArgs.pagedRat,
otherArgs.statsSelection_fast, otherArgs.segSize,
otherArgs.numIntCols, otherArgs.numFloatCols)

writeCompletePages(otherArgs.pagedRat, otherArgs.attrTbl,
otherArgs.statsSelection_fast)


def calcPerSegmentStatsTiledRIOS(imgfile, imgbandnum, segfile,
statsSelection, numReadWorkers=0, missingStatsValue=-9999):
"""
Calculate selected per-segment statistics for the given band
of the imgfile, against the given segment raster file.
Calculated statistics are written to the segfile raster
attribute table (RAT), so this file format must support RATs.
Calculations are carried out in a memory-efficient way, allowing
very large rasters to be processed. Raster data is handled in
small tiles, attribute table is handled in fixed-size chunks.
Parameters
----------
imgfile : string
Path to input file for collecting statistics from
imgbandnum : int
1-based index of the band number in imgfile to use for collecting stats
segfile : str
Path to segmented file. Will collect stats
in imgfile for each segment in this file.
statsSelection : list of tuples.
One tuple for each statistic to be included. Each tuple is either 2
or 3 elements::
(columnName, statName)
or ::
(columnName, statName, parameter)
The columnName is a string, used to name the column in the
output RAT.
The statName is a string used to identify which statistic
is to be calculated. Available options are::
'min', 'max', 'mean', 'stddev', 'median', 'mode', 'percentile', 'pixcount'
The 'percentile' statistic requires the 3-element form, with
the 3rd element being the percentile to be calculated.
For example::
[('Band1_Mean', 'mean'),
('Band1_stdDev', 'stddev'),
('Band1_LQ', 'percentile', 25),
('Band1_UQ', 'percentile', 75)]
would create 4 columns, for the per-segment mean and
standard deviation of the given band, and the lower and upper
quartiles, with corresponding column names.
Any pixels that are set to the nodata value of imgfile (if set)
are ignored in the stats calculations. If there are no pixels
that aren't the nodata value then the value passed in as
missingStatsValue is put into the RAT for the requested
statistics.
The 'pixcount' statName can be used to find the number of
valid pixels (not nodata) that were used to calculate the statistics.
numReadWorkers : int
Number of concurrent read workers that RIOS will use.
missingStatsValue : int or float
What to set for segments that have no valid pixels in imgile
"""
if not HAVE_RIOS:
raise PyShepSegStatsError('RIOS needs to be installed for this function')

segds, segband, imgds, imgband = doImageAlignmentChecks(segfile,
imgfile, imgbandnum)

attrTbl = segband.GetDefaultRAT()
existingColNames = [attrTbl.GetNameOfCol(i)
for i in range(attrTbl.GetColumnCount())]

# Note: may be None if no value set
imgNullVal = imgband.GetNoDataValue()
if imgNullVal is not None:
# cast to the same type we are using for imagery
# (GDAL records this value as double)
imgNullVal = tiling.numbaTypeForImageType(imgNullVal)

histColNdx = checkHistColumn(existingColNames)
segSize = attrTbl.ReadAsArray(histColNdx).astype(numpy.uint32)

# close all files so they can be opened in RIOS
del segband
del segds
del imgband
del imgds

# now create a new temporary file for saving the new columns too
tempFileMgr = applier.TempfileManager()
tempKEA = tempFileMgr.mktempfile(prefix='pyshepseg_tilingstats_', suffix='.kea')
keaDriver = gdal.GetDriverByName('KEA')
tempKEADS = keaDriver.Create(tempKEA, 10, 10, 1, gdal.GDT_UInt32)
tempKEABand = tempKEADS.GetRasterBand(1)
tempKEAAttrTbl = tempKEABand.GetDefaultRAT()

# Create columns (should be non in temp file)
colIndexList = createStatColumns(statsSelection, tempKEAAttrTbl, [])
(statsSelection_fast, numIntCols, numFloatCols) = (
makeFastStatsSelection(colIndexList, statsSelection))

inputs = applier.FilenameAssociations()
inputs.segfile = segfile
inputs.imgfile = imgfile

# we don't actually write any outputs
outputs = applier.FilenameAssociations()

controls = applier.ApplierControls()
controls.selectInputImageLayers([imgbandnum])
controls.setWindowSize(tiling.TILESIZE)

if numReadWorkers > 0:
conc = applier.ConcurrencyStyle(numReadWorkers=numReadWorkers)
controls.setConcurrencyStyle(conc)

otherArgs = applier.OtherInputs()
otherArgs.segDict = createSegDict()
otherArgs.pagedRat = tiling.createPagedRat()
otherArgs.noDataDict = createNoDataDict()
otherArgs.attrTbl = tempKEAAttrTbl
otherArgs.imgNullVal = imgNullVal
otherArgs.missingStatsValue = missingStatsValue
otherArgs.statsSelection_fast = statsSelection_fast
otherArgs.segSize = segSize
otherArgs.numIntCols = numIntCols
otherArgs.numFloatCols = numFloatCols

applier.apply(rioscalcPerSegmentStatsTiled, inputs, outputs,
controls=controls, otherArgs=otherArgs)

del tempKEAAttrTbl
del tempKEABand
del tempKEADS

# all pages should now be written. Raise an error if this not the case.
if len(otherArgs.pagedRat) > 0:
raise PyShepSegStatsError('Not all pixels found during processing')

# now merge the stats from the tempfile band info segfile
ratapplier.copyRat(tempKEA, segfile)


def doImageAlignmentChecks(segfile, imgfile, imgbandnum):
"""
Do the checks that the segment file and image file that is being used to
Expand Down

0 comments on commit 11d9b25

Please sign in to comment.