Skip to content

Commit

Permalink
Remove changes to concat()
Browse files Browse the repository at this point in the history
  • Loading branch information
hagenw committed Oct 25, 2023
1 parent 3c113db commit 74eaf08
Showing 1 changed file with 23 additions and 45 deletions.
68 changes: 23 additions & 45 deletions audformat/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def concat(
objs: typing.Sequence[typing.Union[pd.Series, pd.DataFrame]],
*,
overwrite: bool = False,
rename: bool = False,
) -> typing.Union[pd.Series, pd.DataFrame]:
r"""Concatenate objects.
Expand Down Expand Up @@ -69,20 +68,14 @@ def concat(
Args:
objs: objects
overwrite: overwrite values where indices overlap
rename: rename column
instead of raising an error
if two columns have the same name,
but different content.
Add ``'-1'`` at end of second column
Returns:
concatenated objects
Raises:
ValueError: if level and dtypes of object indices do not match
ValueError: if columns with the same name have different dtypes
ValueError: if ``rename`` is ``False``
and values in the same position do not match
ValueError: if values in the same position do not match
Examples:
>>> concat(
Expand Down Expand Up @@ -175,21 +168,6 @@ def concat(
f3 0 days NaT 2.0 b
"""

def add_column(columns, column):
# Adjust dtype and initialize empty column in columns.
if pd.api.types.is_integer_dtype(column.dtype):
dtype = 'Int64'
elif pd.api.types.is_bool_dtype(column.dtype):
dtype = 'boolean'
else:
dtype = column.dtype
columns[column.name] = pd.Series(
index=index,
dtype=dtype,
)
return columns

if not objs:
return pd.Series([], index=pd.Index([]), dtype='object')

Expand All @@ -216,7 +194,6 @@ def add_column(columns, column):

# reindex all columns to the new index
columns_reindex = {}
rename_counter = 0
for column in columns:

# if we already have a column with that name, we have to merge them
Expand Down Expand Up @@ -265,31 +242,32 @@ def add_column(columns, column):
combine.dropna(inplace=True)
differ = combine['left'] != combine['right']
if np.any(differ):
if rename:
# Rename column with different labels
rename_counter += 1
column.name = f'{column.name}-{rename_counter}'
columns_reindex = add_column(
columns_reindex,
column,
)
else:
max_display = 10
overlap = combine[differ]
msg_overlap = str(overlap[:max_display])
msg_tail = '\n...' \
if len(overlap) > max_display \
else ''
raise ValueError(
"Found overlapping data in column "
f"'{column.name}':\n"
f"{msg_overlap}{msg_tail}"
)
max_display = 10
overlap = combine[differ]
msg_overlap = str(overlap[:max_display])
msg_tail = '\n...' \
if len(overlap) > max_display \
else ''
raise ValueError(
"Found overlapping data in column "
f"'{column.name}':\n"
f"{msg_overlap}{msg_tail}"
)

# drop NaN to avoid overwriting values from other column
column = column.dropna()
else:
columns_reindex = add_column(columns_reindex, column)
# Adjust dtype and initialize empty column
if pd.api.types.is_integer_dtype(column.dtype):
dtype = 'Int64'
elif pd.api.types.is_bool_dtype(column.dtype):
dtype = 'boolean'
else:
dtype = column.dtype
columns_reindex[column.name] = pd.Series(
index=index,
dtype=dtype,
)
columns_reindex[column.name][column.index] = column

# Use `None` to force `{}` return the correct index, see
Expand Down

0 comments on commit 74eaf08

Please sign in to comment.