From 74eaf0807009af247891e41b1d2e4f71a65c9d2a Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 25 Oct 2023 08:03:05 +0200 Subject: [PATCH] Remove changes to concat() --- audformat/core/utils.py | 68 ++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 45 deletions(-) diff --git a/audformat/core/utils.py b/audformat/core/utils.py index d54daefb..761b4ce7 100644 --- a/audformat/core/utils.py +++ b/audformat/core/utils.py @@ -37,7 +37,6 @@ def concat( objs: typing.Sequence[typing.Union[pd.Series, pd.DataFrame]], *, overwrite: bool = False, - rename: bool = False, ) -> typing.Union[pd.Series, pd.DataFrame]: r"""Concatenate objects. @@ -69,11 +68,6 @@ def concat( Args: objs: objects overwrite: overwrite values where indices overlap - rename: rename column - instead of raising an error - if two columns have the same name, - but different content. - Add ``'-1'`` at end of second column Returns: concatenated objects @@ -81,8 +75,7 @@ def concat( Raises: ValueError: if level and dtypes of object indices do not match ValueError: if columns with the same name have different dtypes - ValueError: if ``rename`` is ``False`` - and values in the same position do not match + ValueError: if values in the same position do not match Examples: >>> concat( @@ -175,21 +168,6 @@ def concat( f3 0 days NaT 2.0 b """ - - def add_column(columns, column): - # Adjust dtype and initialize empty column in columns. - if pd.api.types.is_integer_dtype(column.dtype): - dtype = 'Int64' - elif pd.api.types.is_bool_dtype(column.dtype): - dtype = 'boolean' - else: - dtype = column.dtype - columns[column.name] = pd.Series( - index=index, - dtype=dtype, - ) - return columns - if not objs: return pd.Series([], index=pd.Index([]), dtype='object') @@ -216,7 +194,6 @@ def add_column(columns, column): # reindex all columns to the new index columns_reindex = {} - rename_counter = 0 for column in columns: # if we already have a column with that name, we have to merge them @@ -265,31 +242,32 @@ def add_column(columns, column): combine.dropna(inplace=True) differ = combine['left'] != combine['right'] if np.any(differ): - if rename: - # Rename column with different labels - rename_counter += 1 - column.name = f'{column.name}-{rename_counter}' - columns_reindex = add_column( - columns_reindex, - column, - ) - else: - max_display = 10 - overlap = combine[differ] - msg_overlap = str(overlap[:max_display]) - msg_tail = '\n...' \ - if len(overlap) > max_display \ - else '' - raise ValueError( - "Found overlapping data in column " - f"'{column.name}':\n" - f"{msg_overlap}{msg_tail}" - ) + max_display = 10 + overlap = combine[differ] + msg_overlap = str(overlap[:max_display]) + msg_tail = '\n...' \ + if len(overlap) > max_display \ + else '' + raise ValueError( + "Found overlapping data in column " + f"'{column.name}':\n" + f"{msg_overlap}{msg_tail}" + ) # drop NaN to avoid overwriting values from other column column = column.dropna() else: - columns_reindex = add_column(columns_reindex, column) + # Adjust dtype and initialize empty column + if pd.api.types.is_integer_dtype(column.dtype): + dtype = 'Int64' + elif pd.api.types.is_bool_dtype(column.dtype): + dtype = 'boolean' + else: + dtype = column.dtype + columns_reindex[column.name] = pd.Series( + index=index, + dtype=dtype, + ) columns_reindex[column.name][column.index] = column # Use `None` to force `{}` return the correct index, see