Skip to content

Commit

Permalink
311 Always write at least one NaN line to stats file. Fix image_stats…
Browse files Browse the repository at this point in the history
… time concat
  • Loading branch information
Raymond Nepstad committed Sep 24, 2024
1 parent 598ea55 commit 2664781
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
7 changes: 5 additions & 2 deletions pyopia/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,18 @@ def write_stats(stats,
if append and os.path.isfile(datafilename + '-STATS.nc'):
existing_stats = load_stats(datafilename + '-STATS.nc')
xstats = xarray.concat([existing_stats, xstats], 'index')
ximage_stats = image_stats.to_xarray()
elif not append:
# When appending, only store the last row in the image_stats DataFrame
ximage_stats = image_stats.loc[[image_stats.index[-1]], :].to_xarray()
datafilename += ('-Image-D' +
str(xstats['timestamp'][0].values).replace('-', '').replace(':', '').replace('.', '-'))
encoding = setup_xstats_encoding(xstats)
xstats.to_netcdf(datafilename + '-STATS.nc', encoding=encoding, engine=NETCDF_ENGINE, format='NETCDF4')

# If we have image statistics (summary data for each raw image), add the image_stats a group
if image_stats is not None:
image_stats.to_xarray().to_netcdf(datafilename + '-STATS.nc', group='image_stats', mode='a', engine=NETCDF_ENGINE)
ximage_stats.to_netcdf(datafilename + '-STATS.nc', group='image_stats', mode='a', engine=NETCDF_ENGINE)


def setup_xstats_encoding(xstats, string_vars=['export name', 'holo_filename']):
Expand Down Expand Up @@ -330,7 +333,7 @@ def merge_and_save_mfdataset(path_to_data, prefix='*', overwrite_existing_partia
num_chunks = int(np.ceil(num_files / chunk_size_used))
filelist_chunks = [sorted_filelist[i*chunk_size_used:min(num_files, (i+1)*chunk_size_used)] for i in range(num_chunks)]
infostr = f'Processing {num_chunks} partial file lists of {chunk_size_used} files each'
infostr += ', based on a total of {num_files} files.'
infostr += f', based on a total of {num_files} files.'
logging.info(infostr)

# Get config from first file in list
Expand Down
14 changes: 6 additions & 8 deletions pyopia/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,12 @@ def extract_particles(imc, timestamp, Classification, region_properties,
stats : DataFrame
List of particle statistics for every particle, according to Partstats class
'''
filenames = ['not_exported'] * len(region_properties)
num_rows = max(len(region_properties), 1)
filenames = ['not_exported'] * num_rows

if Classification is not None:
# pre-allocation
predictions = np.zeros((len(region_properties),
len(Classification.class_labels)),
dtype='float64')
predictions *= np.nan
predictions = np.nan * np.zeros((num_rows, len(Classification.class_labels)), dtype='float64')

# obtain the original image filename from the timestamp
filename = timestamp.strftime('D%Y%m%dT%H%M%S.%f')
Expand All @@ -292,8 +290,8 @@ def extract_particles(imc, timestamp, Classification, region_properties,
HDF5File = None

# pre-allocate some things
data = np.zeros((len(region_properties), len(propnames)), dtype=np.float64)
bboxes = np.zeros((len(region_properties), 4), dtype=np.float64)
data = np.nan * np.zeros((num_rows, len(propnames)), dtype=np.float64)
bboxes = np.nan * np.zeros((num_rows, 4), dtype=np.float64)
nb_extractable_part = 0

for i, el in enumerate(region_properties):
Expand Down Expand Up @@ -630,7 +628,7 @@ def __call__(self, data):
data['image_stats'].loc[data['timestamp'], 'saturation'] = image_saturation

# Skip remaining calculations if no particles where found
if data['stats'].size == 0:
if data['stats'].dropna().size == 0:
return data

# Calculate D50, nc and vc stats
Expand Down

0 comments on commit 2664781

Please sign in to comment.