Skip to content

Commit

Permalink
get running
Browse files Browse the repository at this point in the history
  • Loading branch information
gillins committed Jun 28, 2024
1 parent 11d9b25 commit c914e57
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 16 deletions.
21 changes: 14 additions & 7 deletions parallel_examples/awsbatch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ RUN sed -i "s/http:\/\/archive./http:\/\/${AWS_REGION_ENV}.ec2.archive./g" /etc/
RUN apt-get update
RUN apt-get upgrade -y
RUN apt-get install -y python3-gdal python3-boto3 python3-sklearn \
python3-numba wget g++ cmake libhdf5-dev libgdal-dev python3-pip
python3-numba wget g++ cmake libhdf5-dev libgdal-dev python3-pip unzip

ENV SW_VOLUME=/ubarscsw
RUN mkdir $SW_VOLUME
Expand All @@ -39,13 +39,20 @@ RUN cd /tmp \
&& rm -rf kealib-${KEALIB_VERSION} kealib-${KEALIB_VERSION}.tar.gz

ENV RIOS_VERSION=2.0.3
#RUN cd /tmp \
# && wget -q https://github.com/ubarsc/rios/releases/download/rios-${RIOS_VERSION}/rios-${RIOS_VERSION}.tar.gz \
# && tar xf rios-${RIOS_VERSION}.tar.gz \
# && cd rios-${RIOS_VERSION} \
# && DEB_PYTHON_INSTALL_LAYOUT=deb_system pip install . \
# && cd .. \
# && rm -rf rios-${RIOS_VERSION} rios-${RIOS_VERSION}.tar.gz
RUN cd /tmp \
&& wget -q https://github.com/ubarsc/rios/releases/download/rios-${RIOS_VERSION}/rios-${RIOS_VERSION}.tar.gz \
&& tar xf rios-${RIOS_VERSION}.tar.gz \
&& cd rios-${RIOS_VERSION} \
&& wget -q https://github.com/ubarsc/rios/archive/refs/heads/master.zip \
&& unzip master.zip \
&& cd rios-master \
&& DEB_PYTHON_INSTALL_LAYOUT=deb_system pip install . \
&& cd .. \
&& rm -rf rios-${RIOS_VERSION} rios-${RIOS_VERSION}.tar.gz
&& rm -rf rios-master rios-master.zip

COPY pyshepseg-$PYSHEPSEG_VER.tar.gz /tmp
# install pyshegseg
Expand All @@ -69,14 +76,14 @@ ENV CPL_VSIL_CURL_ALLOWED_EXTENSIONS=".tif,.TIF,.tiff,.vrt,.zip"
ENV VSI_CACHE=True
ENV VSI_CACHE_SIZE=1024000000
ENV GDAL_HTTP_MAX_RETRY=10
ENV GDAL_HTTP_MAX_RETRY=3
ENV GDAL_HTTP_MAX_DELAY=3
ENV CPL_ZIP_ENCODING=UTF-8

COPY do_prepare.py $SW_VOLUME/bin
COPY do_tile.py $SW_VOLUME/bin
COPY do_stitch.py $SW_VOLUME/bin

RUN apt-get remove -y wget g++ cmake
RUN apt-get remove -y wget g++ cmake unzip
RUN apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/*

USER $SERVICEUSER
Expand Down
5 changes: 4 additions & 1 deletion parallel_examples/awsbatch/do_stitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ def main():
utils.writeRandomColourTable(band, maxSegId + 1)
utils.addOverviews(localDs)

# ensure dataset is closed so we can open it again in RIOS
del localDs

# now do any stats the user has asked for
if cmdargs.stats is not None:

Expand All @@ -131,7 +134,7 @@ def main():
for img, bandnum, selection in dataForStats:
print(img, bandnum, selection)
tilingstats.calcPerSegmentStatsTiledRIOS(img, bandnum,
localDs, selection, numReadWorkers=4)
localOutfile, selection, numReadWorkers=4)

if cmdargs.spatialstats is not None:
bucket, spatialstatsKey = cmdargs.spatialstats.split(':')
Expand Down
2 changes: 1 addition & 1 deletion parallel_examples/awsbatch/template/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ Resources:
JobDefinitionName: PyShepSegBatchJobDefinitionStitch
ContainerProperties:
Image: !Join ['', [!GetAtt BatchRepository.RepositoryUri, ":latest"]]
Vcpus: 4
Vcpus: 2
Memory: 8000
RetryStrategy:
Attempts: 1
Expand Down
19 changes: 12 additions & 7 deletions pyshepseg/tilingstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
try:
from rios import applier
from rios import ratapplier
HAVE_RIOS = True
except ImportError:
pass

Expand Down Expand Up @@ -278,18 +279,25 @@ def calcPerSegmentStatsTiledRIOS(imgfile, imgbandnum, segfile,
segSize = attrTbl.ReadAsArray(histColNdx).astype(numpy.uint32)

# close all files so they can be opened in RIOS
del attrTbl
del segband
del segds
del imgband
del imgds

controls = applier.ApplierControls()
controls.selectInputImageLayers([imgbandnum])
#controls.setWindowSize(tiling.TILESIZE, tiling.TILESIZE)

# now create a new temporary file for saving the new columns too
tempFileMgr = applier.TempfileManager()
tempFileMgr = applier.TempfileManager(controls.tempdir)
tempKEA = tempFileMgr.mktempfile(prefix='pyshepseg_tilingstats_', suffix='.kea')
keaDriver = gdal.GetDriverByName('KEA')
tempKEADS = keaDriver.Create(tempKEA, 10, 10, 1, gdal.GDT_UInt32)
tempKEABand = tempKEADS.GetRasterBand(1)
tempKEAAttrTbl = tempKEABand.GetDefaultRAT()
# make same size as original
tempKEAAttrTbl.SetRowCount(segSize.size)

# Create columns (should be non in temp file)
colIndexList = createStatColumns(statsSelection, tempKEAAttrTbl, [])
Expand All @@ -303,12 +311,9 @@ def calcPerSegmentStatsTiledRIOS(imgfile, imgbandnum, segfile,
# we don't actually write any outputs
outputs = applier.FilenameAssociations()

controls = applier.ApplierControls()
controls.selectInputImageLayers([imgbandnum])
controls.setWindowSize(tiling.TILESIZE)

if numReadWorkers > 0:
conc = applier.ConcurrencyStyle(numReadWorkers=numReadWorkers)
conc = applier.ConcurrencyStyle(numReadWorkers=numReadWorkers,
readBufferPopTimeout=30)
controls.setConcurrencyStyle(conc)

otherArgs = applier.OtherInputs()
Expand All @@ -335,7 +340,7 @@ def calcPerSegmentStatsTiledRIOS(imgfile, imgbandnum, segfile,
raise PyShepSegStatsError('Not all pixels found during processing')

# now merge the stats from the tempfile band info segfile
ratapplier.copyRat(tempKEA, segfile)
ratapplier.copyRAT(tempKEA, segfile)


def doImageAlignmentChecks(segfile, imgfile, imgbandnum):
Expand Down

0 comments on commit c914e57

Please sign in to comment.