From 58ef182951c1ad425bb718184709acbca62f7b64 Mon Sep 17 00:00:00 2001 From: Joe Ayoub <45374896+joe-ayoub-segment@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:47:45 +0100 Subject: [PATCH] [S3] - Bug fix and tests (#2472) * bug fix in progress * adding test to s3 --- .../src/destinations/s3/functions.ts | 46 +++--- .../s3/syncToS3/__tests__/index.test.ts | 138 +++++++++++++----- .../src/destinations/s3/types.ts | 5 + 3 files changed, 123 insertions(+), 66 deletions(-) diff --git a/packages/destination-actions/src/destinations/s3/functions.ts b/packages/destination-actions/src/destinations/s3/functions.ts index bf9e713584..535e28a05d 100644 --- a/packages/destination-actions/src/destinations/s3/functions.ts +++ b/packages/destination-actions/src/destinations/s3/functions.ts @@ -1,31 +1,30 @@ import { Payload } from './syncToS3/generated-types' import { Settings } from './generated-types' import { Client } from './client' -import { RawMapping } from './types' +import { RawMapping, ColumnHeader } from './types' import { IntegrationError } from '@segment/actions-core' export async function send(payloads: Payload[], settings: Settings, rawMapping: RawMapping) { const batchSize = payloads[0] && typeof payloads[0].batch_size === 'number' ? payloads[0].batch_size : 0 + const delimiter = payloads[0]?.delimiter + const actionColName = payloads[0]?.audience_action_column_name if (batchSize > 25000) { throw new IntegrationError('Batch size cannot exceed 25000', 'Invalid Payload', 400) } - const headers = Object.keys(rawMapping.columns).map((column) => { - return snakeCase(column) - }) - - const actionColName = payloads[0]?.audience_action_column_name - const actionColNameSnakeCase = snakeCase(actionColName) + const headers: ColumnHeader[] = Object.entries(rawMapping.columns) + .filter(([_, value]) => value !== '') + .map(([column]) => { + return { cleanName: clean(delimiter, column), originalName: column }; + }); - if (actionColNameSnakeCase) { - headers.push(actionColNameSnakeCase) + if (actionColName) { + headers.push({cleanName: clean(delimiter, actionColName), originalName: actionColName} ) } - const delimiter = payloads[0]?.delimiter - - const fileContent = generateFile(payloads, headers, delimiter, actionColNameSnakeCase) - + const fileContent = generateFile(payloads, headers, delimiter, actionColName) + const s3Client = new Client(settings.s3_aws_region, settings.iam_role_arn, settings.iam_external_id) await s3Client.uploadS3( @@ -37,14 +36,11 @@ export async function send(payloads: Payload[], settings: Settings, rawMapping: ) } -export function snakeCase(str?: string) { +export function clean(delimiter: string, str?: string) { if (!str) { return '' } - // Replace each uppercase letter with an underscore followed by the letter (except at the start) - return str - .replace(/([a-z])([A-Z])/g, '$1_$2') // Add underscore between lowercase and uppercase letters - .toLowerCase() + return delimiter === 'tab' ? str : str.replace(delimiter, '') } function processField(row: string[], value: unknown | undefined) { @@ -59,19 +55,19 @@ function processField(row: string[], value: unknown | undefined) { ) } -function generateFile(payloads: Payload[], headers: string[], delimiter: string, actionColName?: string): string { - const rows: string[] = [] - rows.push(`${headers.join(delimiter === 'tab' ? '\t' : delimiter)}\n`) +export function generateFile(payloads: Payload[], headers: ColumnHeader[], delimiter: string, actionColName?: string): string { + const rows: string[] = []; + rows.push(`${headers.map(header => header.cleanName).join(delimiter === 'tab' ? '\t' : delimiter)}\n`) payloads.forEach((payload, index) => { const isLastRow = index === payloads.length - 1 const row: string[] = [] headers.forEach((header) => { - if (header === actionColName) { + if (header.originalName === actionColName) { processField(row, getAudienceAction(payload)) } else { - processField(row, payload.columns[header]) + processField(row, payload.columns[header.originalName]) } }) @@ -90,6 +86,4 @@ export function getAudienceAction(payload: Payload): boolean | undefined { } return (payload?.traits_or_props as Record | undefined)?.[payload.computation_key] ?? undefined -} - -export { generateFile } +} \ No newline at end of file diff --git a/packages/destination-actions/src/destinations/s3/syncToS3/__tests__/index.test.ts b/packages/destination-actions/src/destinations/s3/syncToS3/__tests__/index.test.ts index 1c3cc24da3..044913188a 100644 --- a/packages/destination-actions/src/destinations/s3/syncToS3/__tests__/index.test.ts +++ b/packages/destination-actions/src/destinations/s3/syncToS3/__tests__/index.test.ts @@ -1,40 +1,22 @@ import { generateFile } from '../../functions' // Adjust the import path import { Payload } from '../generated-types' -import { snakeCase, encodeString, getAudienceAction } from '../../functions' +import { clean, encodeString, getAudienceAction } from '../../functions' +import { ColumnHeader } from '../../types' -// Mock Client class -jest.mock('../../client', () => { - return { - Client: jest.fn().mockImplementation(() => ({ - assumeRole: jest.fn().mockResolvedValue({ - accessKeyId: 'mockAccessKeyId', - secretAccessKey: 'mockSecretAccessKey', - sessionToken: 'mockSessionToken' - }), - uploadS3: jest.fn().mockResolvedValue({ - statusCode: 200, - message: 'Upload successful' - }) - })) - } -}) - -// Test snakeCase function -describe('snakeCase', () => { - it('should convert camelCase to snake_case', () => { - expect(snakeCase('abcdEfg')).toEqual('abcd_efg') +describe('clean', () => { + it('should remove delimiter from string', () => { + expect(clean(',', 'abcd,Efg')).toEqual('abcdEfg') }) it('should handle undefined input', () => { - expect(snakeCase(undefined)).toBe('') + expect(clean(',', '')).toBe('') }) it('should handle empty string', () => { - expect(snakeCase('')).toBe('') + expect(clean('')).toBe('') }) }) -// Test encodeString function describe('encodeString', () => { it('should return a string enclosed in double quotes and escaped', () => { expect(encodeString('value')).toBe('"value"') @@ -42,7 +24,6 @@ describe('encodeString', () => { }) }) -// Test getAudienceAction function describe('getAudienceAction', () => { it('should return undefined if traits_or_props or computation_key are not defined', () => { const payload: Payload = { @@ -69,21 +50,98 @@ describe('getAudienceAction', () => { }) }) -// Test generateFile function describe('generateFile', () => { - it('should generate a CSV file with correct content', () => { - const payloads: Payload[] = [ - { - columns: { - event_name: 'Test Event' + const payloads: Payload[] = [ + { + columns: { + event_name: 'Custom Event 1', + event_type: 'track', + user_id: 'user_id_1', + anonymous_id: 'anonymous_id_1', + email: 'test@test.com', + properties: { + prop_str: 'Hello String!', + prop_num: 123.45, + prop_bool: true, + prop_datetime: '2024-01-08T13:52:50.212Z', + prop_date: '2024-01-08', + prop_obj: { key1: 'value1', key2: 'value2' }, + prop_arr: ['value1', 'value2'], + custom_field_1: 'Custom Field Value 1', + custom_field_2: 'Custom Field Value 2' }, - delimiter: ',', - enable_batching: false, - file_extension: 'csv' - } - ] - const headers = ['event_name'] - const result = generateFile(payloads, headers, ',', 'action_column') - expect(result).toContain('Test Event') + traits: { + first_name: 'John', + last_name: 'Doe', + email: 'test@test.com' + }, + context: { + traits: { + first_name: 'John', + last_name: 'Doe', + email: 'test@test.com' + }, + personas: { + computation_key: 'audience_name_1', + computation_id: 'audience_id_1', + space_id: 'space_id_1' + } + }, + timestamp: '2024-01-08T13:52:50.212Z', + message_id: 'aaa-bbb-ccc', + integrations: {}, + audience_name: 'audience_name_1', + audience_id: 'audience_id_1', + audience_space_id: 'space_id_1', + 'Custom Field 1': 'Custom Field Value 1', + 'Custom Field 2': 'Custom Field Value 2' + }, + audience_action_column_name: 'audience_action', + traits_or_props: { + audience_name_1: true, + prop_str: 'Hello String!', + prop_num: 123.45, + prop_bool: true, + prop_datetime: '2024-01-08T13:52:50.212Z', + prop_date: '2024-01-08', + prop_obj: { key1: 'value1', key2: 'value2' }, + prop_arr: ['value1', 'value2'], + custom_field_1: 'Custom Field Value 1', + custom_field_2: 'Custom Field Value 2' + }, + computation_key: 'audience_name_1', + enable_batching: true, + batch_size: 5000, + delimiter: ',', + file_extension: 'csv', + s3_aws_folder_name: 'foldername1' + } + ] + + const headers: ColumnHeader[] = [ + { cleanName: 'event_name', originalName: 'event_name' }, + { cleanName: 'event_type', originalName: 'event_type' }, + { cleanName: 'user_id', originalName: 'user_id' }, + { cleanName: 'anonymous_id', originalName: 'anonymous_id' }, + { cleanName: 'email', originalName: 'email' }, + { cleanName: 'properties', originalName: 'properties' }, + { cleanName: 'traits', originalName: 'traits' }, + { cleanName: 'context', originalName: 'context' }, + { cleanName: 'timestamp', originalName: 'timestamp' }, + { cleanName: 'message_id', originalName: 'message_id' }, + { cleanName: 'integrations', originalName: 'integrations' }, + { cleanName: 'audience_name', originalName: 'audience_name' }, + { cleanName: 'audience_id', originalName: 'audience_id' }, + { cleanName: 'audience_space_id', originalName: 'audience_space_id' }, + { cleanName: 'Custom Field 1', originalName: 'Custom Field 1' }, + { cleanName: 'Custom Field 2', originalName: 'Custom Field 2' }, + { cleanName: 'audience_action', originalName: 'audience_action' } + ] + + const output = `event_name,event_type,user_id,anonymous_id,email,properties,traits,context,timestamp,message_id,integrations,audience_name,audience_id,audience_space_id,Custom Field 1,Custom Field 2,audience_action\n"Custom Event 1","track","user_id_1","anonymous_id_1","test@test.com","{""prop_str"":""Hello String!"",""prop_num"":123.45,""prop_bool"":true,""prop_datetime"":""2024-01-08T13:52:50.212Z"",""prop_date"":""2024-01-08"",""prop_obj"":{""key1"":""value1"",""key2"":""value2""},""prop_arr"":[""value1"",""value2""],""custom_field_1"":""Custom Field Value 1"",""custom_field_2"":""Custom Field Value 2""}","{""first_name"":""John"",""last_name"":""Doe"",""email"":""test@test.com""}","{""traits"":{""first_name"":""John"",""last_name"":""Doe"",""email"":""test@test.com""},""personas"":{""computation_key"":""audience_name_1"",""computation_id"":""audience_id_1"",""space_id"":""space_id_1""}}","2024-01-08T13:52:50.212Z","aaa-bbb-ccc","{}","audience_name_1","audience_id_1","space_id_1","Custom Field Value 1","Custom Field Value 2","true"` + + it('should generate a CSV file with correct content', () => { + const result = generateFile(payloads, headers, ',', 'audience_action') + expect(result).toEqual(output) }) }) diff --git a/packages/destination-actions/src/destinations/s3/types.ts b/packages/destination-actions/src/destinations/s3/types.ts index 145603218f..5e934cf14b 100644 --- a/packages/destination-actions/src/destinations/s3/types.ts +++ b/packages/destination-actions/src/destinations/s3/types.ts @@ -13,3 +13,8 @@ export interface RawMapping { [k: string]: unknown } } + +export interface ColumnHeader { + cleanName: string + originalName: string +}