From 612bdc2cf0bfbb2e9617acdf4f824c9736302562 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 14 Aug 2024 16:04:11 -0600 Subject: [PATCH] handle nans better --- snowex_db/string_management.py | 4 ++-- snowex_db/upload.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/snowex_db/string_management.py b/snowex_db/string_management.py index 275e87f..bdefa16 100644 --- a/snowex_db/string_management.py +++ b/snowex_db/string_management.py @@ -207,10 +207,10 @@ def parse_none(value): # If its a nan or none or the string is empty if isinstance(value, str): - if value.lower() in ['nan', 'none'] or not value: + if value.lower() in ['nan', 'none', '-9999', '-9999.0'] or not value: result = None elif isinstance(value, float): - if np.isnan(value): + if np.isnan(value) or value == -9999: result = None return result diff --git a/snowex_db/upload.py b/snowex_db/upload.py index b8315f0..53ee6c3 100644 --- a/snowex_db/upload.py +++ b/snowex_db/upload.py @@ -183,11 +183,15 @@ def build_data(self, data_name): df['type'] = data_name df['date_accessed'] = self.date_accessed + # Manage nans and nones + for c in df.columns: + df[c] = df[c].apply(lambda x: parse_none(x)) + # Get the average if its multisample profile if data_name in self.multi_sample_profiles: kw = '{}_sample'.format(data_name) sample_cols = [c for c in df.columns if kw in c] - df['value'] = df[sample_cols].mean(axis=1).astype(str) + df['value'] = df[sample_cols].mean(axis=1, skipna=True).astype(str) # Replace the data_name sample columns with just sample for s in sample_cols: @@ -203,10 +207,6 @@ def build_data(self, data_name): c for c in df.columns if c not in self.expected_attributes] df = df.drop(columns=drop_cols) - # Manage nans and nones - for c in df.columns: - df[c] = df[c].apply(lambda x: parse_none(x)) - # Clean up comments a bit if 'comments' in df.columns: df['comments'] = df['comments'].apply(