diff --git a/src/record/delete/index.ts b/src/record/delete/index.ts index 7a01f04ee6..3261d039b4 100644 --- a/src/record/delete/index.ts +++ b/src/record/delete/index.ts @@ -8,6 +8,7 @@ import { deleteByRecordNumber } from "./usecases/deleteByRecordNumber"; import { logger } from "../../utils/log"; import type { SupportedImportEncoding } from "../../utils/file"; import { readFile } from "../../utils/file"; +import { isMismatchEncoding } from "../../utils/encoding"; import { parseRecords } from "./parsers"; import { RunError } from "../error"; @@ -41,6 +42,10 @@ const deleteRecordsByFile = async ( filePath: string, encoding?: SupportedImportEncoding, ): Promise => { + if (encoding) { + await validateEncoding(filePath, encoding); + } + const recordNumbers = await getRecordNumbersFromFile( apiClient, app, @@ -85,3 +90,14 @@ const getRecordNumberFieldCode = ( return recordNumberFieldCode; }; + +const validateEncoding: ( + filePath: string, + encoding: SupportedImportEncoding, +) => Promise = async (filePath, encoding) => { + if (await isMismatchEncoding(filePath, encoding)) { + throw new Error( + `Failed to decode the specified CSV file.\nThe specified encoding (${encoding}) might mismatch the actual encoding of the CSV file.`, + ); + } +}; diff --git a/src/record/import/index.ts b/src/record/import/index.ts index 35cfb5b1ee..b4cadfce05 100644 --- a/src/record/import/index.ts +++ b/src/record/import/index.ts @@ -10,6 +10,7 @@ import { userSelected } from "./schema/transformers/userSelected"; import { logger } from "../../utils/log"; import { LocalRecordRepositoryFromStream } from "./repositories/localRecordRepositoryFromStream"; import { RunError } from "../error"; +import { isMismatchEncoding } from "../../utils/encoding"; export type Options = { app: string; @@ -34,8 +35,10 @@ export const run: ( ...restApiClientOptions } = argv; + if (encoding) { + await validateEncoding(filePath, encoding); + } const apiClient = buildRestAPIClient(restApiClientOptions); - const fieldsJson = await apiClient.app.getFormFields({ app }); const schema = createSchema( fieldsJson, @@ -80,3 +83,14 @@ export const run: ( process.exit(1); } }; + +const validateEncoding: ( + filePath: string, + encoding: SupportedImportEncoding, +) => Promise = async (filePath, encoding) => { + if (await isMismatchEncoding(filePath, encoding)) { + throw new Error( + `Failed to decode the specified CSV file.\nThe specified encoding (${encoding}) might mismatch the actual encoding of the CSV file.`, + ); + } +}; diff --git a/src/utils/__tests__/encoding.test.ts b/src/utils/__tests__/encoding.test.ts new file mode 100644 index 0000000000..7c01de9767 --- /dev/null +++ b/src/utils/__tests__/encoding.test.ts @@ -0,0 +1,14 @@ +import { isMismatchEncoding } from "../encoding"; +import path from "path"; + +describe("isMismatchEncoding", () => { + it("should detect the mismatch encoding correctly", async () => { + const inputSJISFile = path.join(__dirname, "./fixtures/input_sjis.csv"); + expect(await isMismatchEncoding(inputSJISFile, "sjis")).toBe(false); + expect(await isMismatchEncoding(inputSJISFile, "utf8")).toBe(true); + + const inputUTF8File = path.join(__dirname, "./fixtures/input_utf8.csv"); + expect(await isMismatchEncoding(inputUTF8File, "sjis")).toBe(true); + expect(await isMismatchEncoding(inputUTF8File, "utf8")).toBe(false); + }); +}); diff --git a/src/utils/encoding.ts b/src/utils/encoding.ts new file mode 100644 index 0000000000..c75077df21 --- /dev/null +++ b/src/utils/encoding.ts @@ -0,0 +1,54 @@ +import fs from "fs"; +import iconv from "iconv-lite"; +import readline from "readline"; +import { extractFileFormat } from "./file"; +import type { SupportedImportEncoding } from "./file"; +import { Transform } from "stream"; + +export const isMismatchEncoding = async ( + filePath: string, + encoding: SupportedImportEncoding, +): Promise => { + const format = extractFileFormat(filePath); + switch (format) { + case "csv": + return isMismatchEncodingOfCsvFile(filePath, encoding); + } + + return false; +}; + +const isMismatchEncodingOfCsvFile: ( + filePath: string, + encoding: SupportedImportEncoding, +) => Promise = async (filePath, encoding) => { + const decodedFirstLine = await getDecodedFirstLine(filePath, encoding); + return containsUntranslatableChars(decodedFirstLine); +}; + +const getDecodedFirstLine: ( + filePath: string, + encoding: SupportedImportEncoding, +) => Promise = async (filePath, encoding) => { + const stream = fs.createReadStream(filePath); + const decodedStream = stream.pipe( + Transform.from(iconv.decodeStream(encoding)), + ); + stream.on("error", (e) => { + decodedStream.destroy(e); + }); + + const reader = readline.createInterface({ + input: decodedStream, + }); + + const { value: firstRow } = await reader[Symbol.asyncIterator]().next(); + reader.close(); + + return firstRow; +}; + +const containsUntranslatableChars: (content: string) => boolean = (content) => { + const untranslatableChars = ["�", "?"]; + return untranslatableChars.some((char) => content.includes(char)); +};