From 32d205d238678e39ee8bcae69a400a39924f2086 Mon Sep 17 00:00:00 2001 From: Ayush Goyal <36241930+agl29@users.noreply.github.com> Date: Thu, 16 May 2024 14:47:41 +0530 Subject: [PATCH] [Importer] Fix importer for Parquet, ORC, and Avro formats (#3736) * [Importer] Fixing importer for Parquet, ORC, and Avro formats * fix lint issues --- .../libs/indexer/src/indexer/indexers/sql.py | 66 +++---- .../indexer/src/indexer/indexers/sql_tests.py | 181 ++++++++++++++++-- 2 files changed, 194 insertions(+), 53 deletions(-) diff --git a/desktop/libs/indexer/src/indexer/indexers/sql.py b/desktop/libs/indexer/src/indexer/indexers/sql.py index 2b87c55d0a..cf87c87e81 100644 --- a/desktop/libs/indexer/src/indexer/indexers/sql.py +++ b/desktop/libs/indexer/src/indexer/indexers/sql.py @@ -14,38 +14,27 @@ # See the License for the specific language governing permissions and # limitations under the License.import logging -from future import standard_library -standard_library.install_aliases() -from builtins import object import csv -import logging -import sys -import urllib.request, urllib.error import uuid - +import logging +import urllib.error +import urllib.request +from builtins import object from collections import OrderedDict +from urllib.parse import unquote as urllib_unquote, urlparse from django.urls import reverse +from django.utils.translation import gettext as _ from azure.abfs.__init__ import abfspath +from desktop.lib import django_mako +from desktop.lib.exceptions_renderable import PopupException +from desktop.settings import BASE_DIR from hadoop.fs.hadoopfs import Hdfs from notebook.connectors.base import get_interpreter from notebook.models import make_notebook from useradmin.models import User -from desktop.lib import django_mako -from desktop.lib.exceptions_renderable import PopupException -from desktop.settings import BASE_DIR - -if sys.version_info[0] > 2: - from urllib.parse import urlparse, unquote as urllib_unquote - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from urllib import unquote as urllib_unquote - from urlparse import urlparse - - LOG = logging.getLogger() @@ -60,6 +49,7 @@ LOG.warning("Impala app is not enabled") impala_conf = None + class SQLIndexer(object): def __init__(self, user, fs): @@ -139,8 +129,8 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco "escapeChar" = "\\\\" ''' % source['format'] - use_temp_table = table_format in ('parquet', 'orc', 'kudu') or is_transactional or isIceberg - if use_temp_table: # We'll be using a temp table to load data + use_temp_table = table_format in ('parquet', 'orc', 'kudu', 'avro') or is_transactional or isIceberg + if use_temp_table: # We'll be using a temp table to load data if load_data: table_name, final_table_name = 'hue__tmp_%s' % table_name, table_name @@ -148,7 +138,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco 'database': database, 'table_name': table_name } - else: # Manual + else: # Manual row_format = '' file_format = table_format skip_header = False @@ -159,8 +149,8 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco collection_delimiter = None map_delimiter = None - if external or (load_data and table_format in ('parquet', 'orc', 'kudu')): # We'll use location to load data - if not self.fs.isdir(external_path): # File selected + if external or (load_data and table_format in ('parquet', 'orc', 'kudu', 'avro')): # We'll use location to load data + if not self.fs.isdir(external_path): # File selected external_path, external_file_name = Hdfs.split(external_path) if len(self.fs.listdir(external_path)) > 1: @@ -171,7 +161,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco self.fs.copy(source_path, external_path) else: self.fs.rename(source_path, external_path) - elif load_data: # We'll use load data command + elif load_data: # We'll use load data command parent_path = self.fs.parent_path(source_path) stats = self.fs.stats(parent_path) split = urlparse(source_path) @@ -180,17 +170,16 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco # check if the csv file is in encryption zone (encBit), then the scratch dir will be # in the same directory base_dir = parent_path if stats.encBit else self.fs.get_home_dir() - user_scratch_dir = base_dir + '/.scratchdir/%s' % str(uuid.uuid4()) # Make sure it's unique. + user_scratch_dir = base_dir + '/.scratchdir/%s' % str(uuid.uuid4()) # Make sure it's unique. self.fs.do_as_user(self.user, self.fs.mkdir, user_scratch_dir, 0o0777) self.fs.do_as_user(self.user, self.fs.rename, source['path'], user_scratch_dir) if editor_type == 'impala' and impala_conf and impala_conf.USER_SCRATCH_DIR_PERMISSION.get(): self.fs.do_as_user(self.user, self.fs.chmod, user_scratch_dir, 0o0777, True) source_path = user_scratch_dir + '/' + source['path'].split('/')[-1] - if external_path.lower().startswith("abfs"): #this is to check if its using an ABFS path + if external_path.lower().startswith("abfs"): # this is to check if its using an ABFS path external_path = abfspath(external_path) - tbl_properties = OrderedDict() if skip_header: tbl_properties['skip.header.line.count'] = '1' @@ -209,7 +198,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco 'serde_name': serde_name, 'serde_properties': serde_properties, 'file_format': file_format, - 'external': external or load_data and table_format in ('parquet', 'orc', 'kudu'), + 'external': external or load_data and table_format in ('parquet', 'orc', 'kudu', 'avro'), 'path': external_path, 'primary_keys': primary_keys if table_format == 'kudu' and not load_data else [], 'tbl_properties': tbl_properties @@ -269,7 +258,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco extra_create_properties += "\nTBLPROPERTIES('transactional'='true', 'transactional_properties'='%s')" % \ default_transactional_type - sql += '''\n\nCREATE TABLE `%(database)s`.`%(final_table_name)s`%(comment)s + sql += '''\n\nCREATE %(table_type)sTABLE `%(database)s`.`%(final_table_name)s`%(comment)s %(extra_create_properties)s AS SELECT %(columns_list)s FROM `%(database)s`.`%(table_name)s`;''' % { @@ -278,7 +267,8 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco 'table_name': table_name, 'extra_create_properties': extra_create_properties, 'columns_list': ', '.join(columns_list), - 'comment': ' COMMENT "%s"' % comment if comment else '' + 'comment': ' COMMENT "%s"' % comment if comment else '', + 'table_type': 'EXTERNAL ' if external and not is_transactional else '' } sql += '\n\nDROP TABLE IF EXISTS `%(database)s`.`%(table_name)s`;\n' % { 'database': database, @@ -377,17 +367,17 @@ def create_table_from_local_file(self, source, destination, start_time=-1): row = self.nomalize_booleans(row, columns) _csv_rows.append(tuple(row)) - if _csv_rows: #sql for data insertion + if _csv_rows: # sql for data insertion csv_rows = str(_csv_rows)[1:-1] if dialect in ('hive', 'mysql'): - sql += '''\nINSERT INTO %(database)s.%(table_name)s VALUES %(csv_rows)s;\n'''% { + sql += '''\nINSERT INTO %(database)s.%(table_name)s VALUES %(csv_rows)s;\n''' % { 'database': database, 'table_name': table_name, 'csv_rows': csv_rows } elif dialect == 'impala': - sql += '''\nINSERT INTO %(database)s.%(table_name)s_tmp VALUES %(csv_rows)s;\n'''% { + sql += '''\nINSERT INTO %(database)s.%(table_name)s_tmp VALUES %(csv_rows)s;\n''' % { 'database': database, 'table_name': table_name, 'csv_rows': csv_rows, @@ -396,12 +386,12 @@ def create_table_from_local_file(self, source, destination, start_time=-1): if dialect == 'impala': # casting from string to boolean is not allowed in impala so string -> int -> bool sql_ = ',\n'.join([ - ' CAST ( `%(name)s` AS %(type)s ) `%(name)s`' % col if col['type'] != 'boolean' \ + ' CAST ( `%(name)s` AS %(type)s ) `%(name)s`' % col if col['type'] != 'boolean' else ' CAST ( CAST ( `%(name)s` AS TINYINT ) AS boolean ) `%(name)s`' % col for col in columns ]) sql += '''\nCREATE TABLE IF NOT EXISTS %(database)s.%(table_name)s -AS SELECT\n%(sql_)s\nFROM %(database)s.%(table_name)s_tmp;\n\nDROP TABLE IF EXISTS %(database)s.%(table_name)s_tmp;'''% { +AS SELECT\n%(sql_)s\nFROM %(database)s.%(table_name)s_tmp;\n\nDROP TABLE IF EXISTS %(database)s.%(table_name)s_tmp;''' % { 'database': database, 'table_name': table_name, 'sql_': sql_ @@ -421,6 +411,7 @@ def create_table_from_local_file(self, source, destination, start_time=-1): is_task=True ) + def _create_database(request, source, destination, start_time): database = destination['name'] comment = destination['description'] @@ -465,6 +456,7 @@ def _create_table(request, source, destination, start_time=-1, file_encoding=Non else: return notebook.execute(request, batch=False) + def _create_table_from_local(request, source, destination, start_time=-1): notebook = SQLIndexer(user=request.user, fs=request.fs).create_table_from_local_file(source, destination, start_time) diff --git a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py index 1e7bcfc159..0511943903 100644 --- a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py +++ b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py @@ -16,28 +16,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object import json -import pytest -import sys +from builtins import object +from unittest.mock import MagicMock, Mock, patch -from desktop.lib.django_test_util import make_logged_in_client -from desktop.settings import BASE_DIR -from useradmin.models import User +import pytest from azure.conf import ABFS_CLUSTERS from beeswax.server import dbms +from desktop.lib.django_test_util import make_logged_in_client +from desktop.settings import BASE_DIR from indexer.indexers.sql import SQLIndexer +from useradmin.models import User -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - def mock_uuid(): return '52f840a8-3dde-434d-934a-2d6e06f3687e' + @pytest.mark.django_db class TestSQLIndexer(object): @@ -45,7 +41,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="empty", recreate=True, is_superuser=False) self.user = User.objects.get(username="test") - def test_create_table_from_a_file_to_csv(self): fs = Mock( stats=Mock(return_value={'mode': 0o0777}) @@ -155,7 +150,7 @@ def destination_dict(key): notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination) - ### source dir is in encryption zone, so the scratch dir is in the same dir + # source dir is in encryption zone, so the scratch dir is in the same dir assert ( [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table` @@ -170,7 +165,8 @@ def destination_dict(key): ) STORED AS TextFile TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false') ; -LOAD DATA INPATH '/enc_zn/upload_dir/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101'); +LOAD DATA INPATH '/enc_zn/upload_dir/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' \ +INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101'); CREATE TABLE `default`.`export_table` COMMENT "No comment!" STORED AS csv TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only') @@ -199,7 +195,7 @@ def source_dict(key): notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination) - ### source dir is not in encryption zone, so the scratch dir is in user's home dir + # source dir is not in encryption zone, so the scratch dir is in user's home dir assert ( [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table` @@ -214,7 +210,8 @@ def source_dict(key): ) STORED AS TextFile TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false') ; -LOAD DATA INPATH '/user/test/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101'); +LOAD DATA INPATH '/user/test/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' \ +INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101'); CREATE TABLE `default`.`export_table` COMMENT "No comment!" STORED AS csv TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only') @@ -223,6 +220,7 @@ def source_dict(key): DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')] == [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]) + class MockRequest(object): def __init__(self, fs=None, user=None): self.fs = fs if fs is not None else MockFs() @@ -558,6 +556,157 @@ def test_generate_create_parquet_table(): assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`;''' in sql, sql + destination['useDefaultLocation'] = False + sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(source, destination).get_str() + + assert '''USE default;''' in sql, sql + + statement = '''CREATE EXTERNAL TABLE IF NOT EXISTS `default`.`hue__tmp_parquet_table` +( + `acct_client` string , + `tran_amount` double , + `tran_country_cd` string , + `vrfcn_city` string , + `vrfcn_city_lat` double , + `vrfcn_city_lon` double ) ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + COLLECTION ITEMS TERMINATED BY '\\002' + MAP KEYS TERMINATED BY '\\003' + STORED AS TextFile LOCATION '/user/hue/data' +TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false') +;''' + assert statement in sql, sql + + assert '''CREATE EXTERNAL TABLE `default`.`parquet_table` + STORED AS parquet + AS SELECT * + FROM `default`.`hue__tmp_parquet_table`; +''' in sql, sql + + assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`;''' in sql, sql + + +@pytest.mark.django_db +def test_generate_create_avro_table(): + source = json.loads('''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",''' + '''"-121.92150116"],["Citi Bank","2800000.0","US","Richmond","37.5242004395","-77.4932022095"],["Deutsche Bank","2600000.0","US",''' + '''"Corpus Christi","40.7807998657","-73.9772033691"],["Thomson Reuters","2400000.0","US","Albany","35.7976989746",''' + '''"-78.6252975464"],''' + '''["OpenX","2200000.0","US","Des Moines","40.5411987305","-119.586898804"]],"sampleCols":[{"operations":[],"comment":"",''' + '''"nested":[],''' + '''"name":"acct_client","level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,''' + '''"partitionValue":"","multiValued":false,"unique":false,"type":"string","showProperties":false,"scale":0},{"operations":[],''' + '''"comment":"","nested":[],"name":"tran_amount","level":0,"keyType":"string","required":false,"precision":10,"keep":true,''' + '''"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,"type":"double",''' + '''"showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"tran_country_cd","level":0,"keyType":''' + '''"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,''' + '''"unique":false,"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city",''' + '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"",''' + '''"multiValued":false,"unique":false,"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],''' + '''"name":"vrfcn_city_lat","level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,''' + '''"length":100,''' + '''"partitionValue":"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0},{"operations":[],''' + '''"comment":"","nested":[],"name":"vrfcn_city_lon","level":0,"keyType":"string","required":false,"precision":10,"keep":true,''' + '''"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,"type":"double","showProperties":false,''' + '''"scale":0}],"inputFormat":"file","inputFormatsAll":[{"value":"file","name":"File"},{"value":"manual","name":"Manually"},''' + '''{"value":"query","name":"SQL Query"},{"value":"table","name":"Table"}],"inputFormatsManual":[{"value":"manual","name":''' + '''"Manually"}],"inputFormats":[{"value":"file","name":"File"},{"value":"manual","name":"Manually"},{"value":"query","name":''' + '''"SQL Query"},{"value":"table","name":"Table"}],"path":"/user/hue/data/query-hive-360.csv","isObjectStore":false,"table":"",''' + '''"tableName":"","databaseName":"default","apiHelperType":"hive","query":"","draggedQuery":"","format":{"type":"csv",''' + '''"fieldSeparator":",","recordSeparator":"\\n","quoteChar":"\\"","hasHeader":true,"status":0},"show":true,"defaultName":''' + '''"default.query-hive-360"}''' + ) + destination = json.loads('''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.avro_table"''' + ''',"apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":[{"name":"Table","value":"table"},''' + '''{"name":"Solr index","value":"index"},{"name":"File","value":"file"},{"name":"Database","value":"database"}],''' + '''"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],"columns":[{"operations":[],''' + '''"comment":"","nested":[],"name":"acct_client","level":0,"keyType":"string","required":false,"precision":10,"keep":true,''' + '''"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,"type":"string","showProperties":''' + '''false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"tran_amount","level":0,"keyType":"string","required":false,''' + '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,"type":''' + '''"double","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"tran_country_cd","level":0,''' + '''"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"",''' + '''"multiValued":false,"unique":false,"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":''' + '''[],"name":"vrfcn_city","level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":''' + '''100,"partitionValue":"","multiValued":false,"unique":false,"type":"string","showProperties":false,"scale":0},{"operations":[],''' + '''"comment":"","nested":[],"name":"vrfcn_city_lat","level":0,"keyType":"string","required":false,"precision":10,"keep":true,''' + '''"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,"type":"double","showProperties":''' + '''false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lon","level":0,"keyType":"string","required":''' + '''false,"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,''' + '''"type":"double","showProperties":false,"scale":0}],"bulkColumnNames":"acct_client,tran_amount,tran_country_cd,vrfcn_city,''' + '''vrfcn_city_lat,vrfcn_city_lon","showProperties":false,"isTargetExisting":false,"isTargetChecking":false,"existingTargetUrl":''' + '''"","tableName":"avro_table","databaseName":"default","tableFormat":"avro","KUDU_DEFAULT_RANGE_PARTITION_COLUMN":''' + '''{"values":[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=","upper_val":1,"include_upper_val":"<="},''' + '''"KUDU_DEFAULT_PARTITION_COLUMN":{"columns":[],"range_partitions":[{"values":[{"value":""}],"name":"VALUES","lower_val":0,''' + '''"include_lower_val":"<=","upper_val":1,"include_upper_val":"<="}],"name":"HASH","int_val":16},"tableFormats":[{"value":''' + '''"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},{"value":"csv","name":"Csv"},''' + '''{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},{"value":"orc",''' + '''"name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],"importData":true,''' + '''"isIceberg":false,"useCopy":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv",''' + '''"hasHeader":true,"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002",''' + '''"customMapDelimiter":"\\\\003","customRegexp":""}''' + ) + + path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']} + request = MockRequest(fs=MockFs(path=path)) + + sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(source, destination).get_str() + + assert '''USE default;''' in sql, sql + + statement = '''CREATE EXTERNAL TABLE IF NOT EXISTS `default`.`hue__tmp_avro_table` +( + `acct_client` string , + `tran_amount` double , + `tran_country_cd` string , + `vrfcn_city` string , + `vrfcn_city_lat` double , + `vrfcn_city_lon` double ) ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + COLLECTION ITEMS TERMINATED BY '\\002' + MAP KEYS TERMINATED BY '\\003' + STORED AS TextFile LOCATION '/user/hue/data' +TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false') +;''' + assert statement in sql, sql + + assert '''CREATE TABLE `default`.`avro_table` + STORED AS avro + AS SELECT * + FROM `default`.`hue__tmp_avro_table`; +''' in sql, sql + + assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_avro_table`;''' in sql, sql + + destination['useDefaultLocation'] = False + sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(source, destination).get_str() + + assert '''USE default;''' in sql, sql + + statement = '''CREATE EXTERNAL TABLE IF NOT EXISTS `default`.`hue__tmp_avro_table` +( + `acct_client` string , + `tran_amount` double , + `tran_country_cd` string , + `vrfcn_city` string , + `vrfcn_city_lat` double , + `vrfcn_city_lon` double ) ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + COLLECTION ITEMS TERMINATED BY '\\002' + MAP KEYS TERMINATED BY '\\003' + STORED AS TextFile LOCATION '/user/hue/data' +TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false') +;''' + assert statement in sql, sql + + assert '''CREATE EXTERNAL TABLE `default`.`avro_table` + STORED AS avro + AS SELECT * + FROM `default`.`hue__tmp_avro_table`; +''' in sql, sql + + assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_avro_table`;''' in sql, sql + @pytest.mark.django_db def test_generate_create_iceberg_table():