handle nans better

SnowEx · Aug 14, 2024 · 612bdc2 · 612bdc2
1 parent 5faf656
commit 612bdc2
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 7 deletions.
diff --git a/snowex_db/string_management.py b/snowex_db/string_management.py
@@ -207,10 +207,10 @@ def parse_none(value):
 
     # If its a nan or none or the string is empty
     if isinstance(value, str):
-        if value.lower() in ['nan', 'none'] or not value:
+        if value.lower() in ['nan', 'none', '-9999', '-9999.0'] or not value:
             result = None
     elif isinstance(value, float):
-        if np.isnan(value):
+        if np.isnan(value) or value == -9999:
             result = None
 
     return result

diff --git a/snowex_db/upload.py b/snowex_db/upload.py
@@ -183,11 +183,15 @@ def build_data(self, data_name):
         df['type'] = data_name
         df['date_accessed'] = self.date_accessed
 
+        # Manage nans and nones
+        for c in df.columns:
+            df[c] = df[c].apply(lambda x: parse_none(x))
+
         # Get the average if its multisample profile
         if data_name in self.multi_sample_profiles:
             kw = '{}_sample'.format(data_name)
             sample_cols = [c for c in df.columns if kw in c]
-            df['value'] = df[sample_cols].mean(axis=1).astype(str)
+            df['value'] = df[sample_cols].mean(axis=1, skipna=True).astype(str)
 
             # Replace the data_name sample columns with just sample
             for s in sample_cols:
@@ -203,10 +207,6 @@ def build_data(self, data_name):
             c for c in df.columns if c not in self.expected_attributes]
         df = df.drop(columns=drop_cols)
 
-        # Manage nans and nones
-        for c in df.columns:
-            df[c] = df[c].apply(lambda x: parse_none(x))
-
         # Clean up comments a bit
         if 'comments' in df.columns:
             df['comments'] = df['comments'].apply(