311 Always write at least one NaN line to stats file. Fix image_stats…

… time concat
SINTEF · Sep 24, 2024 · 2664781 · 2664781
1 parent 598ea55
commit 2664781
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 10 deletions.
diff --git a/pyopia/io.py b/pyopia/io.py
@@ -83,15 +83,18 @@ def write_stats(stats,
         if append and os.path.isfile(datafilename + '-STATS.nc'):
             existing_stats = load_stats(datafilename + '-STATS.nc')
             xstats = xarray.concat([existing_stats, xstats], 'index')
+            ximage_stats = image_stats.to_xarray()
         elif not append:
+            # When appending, only store the last row in the image_stats DataFrame
+            ximage_stats = image_stats.loc[[image_stats.index[-1]], :].to_xarray()
             datafilename += ('-Image-D' +
                              str(xstats['timestamp'][0].values).replace('-', '').replace(':', '').replace('.', '-'))
         encoding = setup_xstats_encoding(xstats)
         xstats.to_netcdf(datafilename + '-STATS.nc', encoding=encoding, engine=NETCDF_ENGINE, format='NETCDF4')
 
         # If we have image statistics (summary data for each raw image), add the image_stats a group
         if image_stats is not None:
-            image_stats.to_xarray().to_netcdf(datafilename + '-STATS.nc', group='image_stats', mode='a', engine=NETCDF_ENGINE)
+            ximage_stats.to_netcdf(datafilename + '-STATS.nc', group='image_stats', mode='a', engine=NETCDF_ENGINE)
 
 
 def setup_xstats_encoding(xstats, string_vars=['export name', 'holo_filename']):
@@ -330,7 +333,7 @@ def merge_and_save_mfdataset(path_to_data, prefix='*', overwrite_existing_partia
     num_chunks = int(np.ceil(num_files / chunk_size_used))
     filelist_chunks = [sorted_filelist[i*chunk_size_used:min(num_files, (i+1)*chunk_size_used)] for i in range(num_chunks)]
     infostr = f'Processing {num_chunks} partial file lists of {chunk_size_used} files each'
-    infostr += ', based on a total of {num_files} files.'
+    infostr += f', based on a total of {num_files} files.'
     logging.info(infostr)
 
     # Get config from first file in list

diff --git a/pyopia/process.py b/pyopia/process.py
@@ -259,14 +259,12 @@ def extract_particles(imc, timestamp, Classification, region_properties,
     stats : DataFrame
         List of particle statistics for every particle, according to Partstats class
     '''
-    filenames = ['not_exported'] * len(region_properties)
+    num_rows = max(len(region_properties), 1)
+    filenames = ['not_exported'] * num_rows
 
     if Classification is not None:
         # pre-allocation
-        predictions = np.zeros((len(region_properties),
-                                len(Classification.class_labels)),
-                               dtype='float64')
-        predictions *= np.nan
+        predictions = np.nan * np.zeros((num_rows, len(Classification.class_labels)), dtype='float64')
 
     # obtain the original image filename from the timestamp
     filename = timestamp.strftime('D%Y%m%dT%H%M%S.%f')
@@ -292,8 +290,8 @@ def extract_particles(imc, timestamp, Classification, region_properties,
         HDF5File = None
 
     # pre-allocate some things
-    data = np.zeros((len(region_properties), len(propnames)), dtype=np.float64)
-    bboxes = np.zeros((len(region_properties), 4), dtype=np.float64)
+    data = np.nan * np.zeros((num_rows, len(propnames)), dtype=np.float64)
+    bboxes = np.nan * np.zeros((num_rows, 4), dtype=np.float64)
     nb_extractable_part = 0
 
     for i, el in enumerate(region_properties):
@@ -630,7 +628,7 @@ def __call__(self, data):
         data['image_stats'].loc[data['timestamp'], 'saturation'] = image_saturation
 
         # Skip remaining calculations if no particles where found
-        if data['stats'].size == 0:
+        if data['stats'].dropna().size == 0:
             return data
 
         # Calculate D50, nc and vc stats