Skip to content

Commit

Permalink
handle nans better
Browse files Browse the repository at this point in the history
  • Loading branch information
micah-prime committed Aug 14, 2024
1 parent 5faf656 commit 612bdc2
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
4 changes: 2 additions & 2 deletions snowex_db/string_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,10 @@ def parse_none(value):

# If its a nan or none or the string is empty
if isinstance(value, str):
if value.lower() in ['nan', 'none'] or not value:
if value.lower() in ['nan', 'none', '-9999', '-9999.0'] or not value:
result = None
elif isinstance(value, float):
if np.isnan(value):
if np.isnan(value) or value == -9999:
result = None

return result
Expand Down
10 changes: 5 additions & 5 deletions snowex_db/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,15 @@ def build_data(self, data_name):
df['type'] = data_name
df['date_accessed'] = self.date_accessed

# Manage nans and nones
for c in df.columns:
df[c] = df[c].apply(lambda x: parse_none(x))

# Get the average if its multisample profile
if data_name in self.multi_sample_profiles:
kw = '{}_sample'.format(data_name)
sample_cols = [c for c in df.columns if kw in c]
df['value'] = df[sample_cols].mean(axis=1).astype(str)
df['value'] = df[sample_cols].mean(axis=1, skipna=True).astype(str)

# Replace the data_name sample columns with just sample
for s in sample_cols:
Expand All @@ -203,10 +207,6 @@ def build_data(self, data_name):
c for c in df.columns if c not in self.expected_attributes]
df = df.drop(columns=drop_cols)

# Manage nans and nones
for c in df.columns:
df[c] = df[c].apply(lambda x: parse_none(x))

# Clean up comments a bit
if 'comments' in df.columns:
df['comments'] = df['comments'].apply(
Expand Down

0 comments on commit 612bdc2

Please sign in to comment.