From 203b70323990acd5b03815224c5e00c9d139d817 Mon Sep 17 00:00:00 2001 From: Akito Kasai Date: Mon, 3 Jun 2024 17:40:06 +0900 Subject: [PATCH] Keep single quotes in InsertIntoWriter (#128) InsertIntoWriter converts a single quote to a double quote because single quotes are reserved for string literals and double quotes are column name. However, this conversion will break users characters when their data have a single quote. For example, current behavior changes `St. Patrick's day` to `St. Patrick"s day` implicitly. To eliminate this issue, this pull request aims to keep single quotes. --- pytd/writer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pytd/writer.py b/pytd/writer.py index d486c39..bede6f0 100644 --- a/pytd/writer.py +++ b/pytd/writer.py @@ -286,9 +286,13 @@ def _build_query(self, database, table, list_of_tuple, column_names): """ rows = [] for tpl in list_of_tuple: + # InsertIntoWriter kicks Presto (Trino). + # Following the list comprehension makes a single quote duplicated because + # Presto allows users to escape a single quote with another single quote. + # e.g. 'John Doe''s name' is converted to "John Doe's name" on Presto. list_of_value_strings = [ ( - f"""'{e.replace("'", '"')}'""" + f"""'{e.replace("'", "''")}'""" if isinstance(e, str) else ("null" if pd.isnull(e) else str(e)) )