From ed33cd904decad440c38d6d4d5bbf596c1c552f7 Mon Sep 17 00:00:00 2001 From: Evan Moon Date: Mon, 22 Apr 2024 11:06:37 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20=ED=95=9C=EA=B8=80=20=EB=AC=B8=EC=9E=A5?= =?UTF-8?q?=EA=B3=BC=20=EB=AC=B8=EC=9E=90=EA=B0=80=20=EB=8B=B4=EA=B8=B4=20?= =?UTF-8?q?=EB=B0=B0=EC=97=B4=EC=9D=84=20=EC=9D=B8=EC=9E=90=EB=A1=9C=20?= =?UTF-8?q?=EB=B0=9B=EC=95=84=20=EA=B7=9C=EC=B9=99=EC=97=90=20=EB=A7=9E?= =?UTF-8?q?=EA=B2=8C=20=ED=95=A9=EC=84=B1=ED=95=98=EB=8A=94=20`assemble`?= =?UTF-8?q?=20=ED=95=A8=EC=88=98=20=EC=B6=94=EA=B0=80=20(#64)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: 인자로 받은 문자의 초성, 중성, 종성 위치 가능 여부를 파악하는 함수 추가 * feat: combineHangulCharacter 함수 추가 * feat: assembleHangul 함수 추가 * refactor: assembleHangul내 의미없는 익명 함수 제거 * chore: update test * refactor: assert 추가 * refactor: export하지않을 함수들은 _internal 모듈로 이동 * chore: binaryAssembleHangul의 jsDoc 추가 * chore: assembleHangul jsDoc 추가 * fix: jsDoc 오타 수정 * refactor: 분해된 자모를 나타내는 변수를 복수형으로 변경 * Create great-yaks-deny.md * refactor: hasSingleBatchim 내 early return 패턴 추가 --- .changeset/great-yaks-deny.md | 5 ++ src/_internal.ts | 4 - src/_internal/hangul.spec.ts | 110 ++++++++++++++++++++++++++ src/_internal/hangul.ts | 143 ++++++++++++++++++++++++++++++++++ src/_internal/index.ts | 18 +++++ src/assemble.spec.ts | 14 ++++ src/assemble.ts | 22 ++++++ src/index.ts | 3 + src/utils.spec.ts | 29 ++++++- src/utils.ts | 27 ++++++- 10 files changed, 369 insertions(+), 6 deletions(-) create mode 100644 .changeset/great-yaks-deny.md delete mode 100644 src/_internal.ts create mode 100644 src/_internal/hangul.spec.ts create mode 100644 src/_internal/hangul.ts create mode 100644 src/_internal/index.ts create mode 100644 src/assemble.spec.ts create mode 100644 src/assemble.ts diff --git a/.changeset/great-yaks-deny.md b/.changeset/great-yaks-deny.md new file mode 100644 index 00000000..eac98791 --- /dev/null +++ b/.changeset/great-yaks-deny.md @@ -0,0 +1,5 @@ +--- +"es-hangul": minor +--- + +feat: 한글 문장과 문자가 담긴 배열을 인자로 받아 규칙에 맞게 합성하는 `assemble` 함수 추가 diff --git a/src/_internal.ts b/src/_internal.ts deleted file mode 100644 index 12e63b34..00000000 --- a/src/_internal.ts +++ /dev/null @@ -1,4 +0,0 @@ -export function excludeLastElement(array: string[]): [string[], string] { - const lastElement = array.at(-1); - return [array.slice(0, -1), lastElement ?? '']; -} diff --git a/src/_internal/hangul.spec.ts b/src/_internal/hangul.spec.ts new file mode 100644 index 00000000..00300b89 --- /dev/null +++ b/src/_internal/hangul.spec.ts @@ -0,0 +1,110 @@ +import { describe, it, expect, assert } from 'vitest'; +import { binaryAssembleHangulCharacters, binaryAssembleHangul, isHangulAlphabet, isHangulCharacter } from './hangul'; + +describe('isHangul*', () => { + it('isHangulCharacter는 완성된 한글 문자를 받으면 true를 반환한다', () => { + expect(isHangulCharacter('가')).toBe(true); + expect(isHangulCharacter('값')).toBe(true); + expect(isHangulCharacter('ㄱ')).toBe(false); + expect(isHangulCharacter('ㅏ')).toBe(false); + expect(isHangulCharacter('a')).toBe(false); + }); + it('isHangulAlphabet은 조합되지않은 한글 문자를 받으면 true를 반환한다', () => { + expect(isHangulAlphabet('가')).toBe(false); + expect(isHangulAlphabet('값')).toBe(false); + expect(isHangulAlphabet('ㄱ')).toBe(true); + expect(isHangulAlphabet('ㅏ')).toBe(true); + expect(isHangulAlphabet('a')).toBe(false); + }); +}); + +describe('binaryAssembleHangulCharacters', () => { + it('초성과 중성만 조합', () => { + expect(binaryAssembleHangulCharacters('ㄱ', 'ㅏ')).toEqual('가'); + }); + + it('초성과 중성이 합쳐진 문자와 종성을 조합', () => { + expect(binaryAssembleHangulCharacters('가', 'ㅇ')).toEqual('강'); + }); + + it('초성과 중성과 종성이 합쳐진 문자와 자음을 조합하여 겹받침 만들기', () => { + expect(binaryAssembleHangulCharacters('갑', 'ㅅ')).toEqual('값'); + }); + + it('초성과 중성이 합쳐진 문자와 모음을 조립하여 겹모음 만들기', () => { + expect(binaryAssembleHangulCharacters('고', 'ㅏ')).toEqual('과'); + }); + + it('모음만 있는 문자와 모음을 조합하여 겹모음 만들기', () => { + expect(binaryAssembleHangulCharacters('ㅗ', 'ㅏ')).toEqual('ㅘ'); + }); + + it('초성과 중성과 종성이 합쳐진 문자의 연음 법칙', () => { + expect(binaryAssembleHangulCharacters('톳', 'ㅡ')).toEqual('토스'); + }); + + it('초성과 중성과 종성(겹받침)이 합쳐진 문자의 연음 법칙', () => { + expect(binaryAssembleHangulCharacters('닭', 'ㅏ')).toEqual('달가'); + expect(binaryAssembleHangulCharacters('깎', 'ㅏ')).toEqual('까까'); + }); + + it('문법에 맞지 않는 문자를 조합하면 단순 Join 한다 (문법 순서 틀림)', () => { + expect(binaryAssembleHangulCharacters('ㅏ', 'ㄱ')).toEqual('ㅏㄱ'); + expect(binaryAssembleHangulCharacters('까', 'ㅃ')).toEqual('까ㅃ'); + expect(binaryAssembleHangulCharacters('ㅘ', 'ㅏ')).toEqual('ㅘㅏ'); + }); + + it('순서대로 입력했을 때 조합이 불가능한 문자라면 단순 Join 한다', () => { + expect(binaryAssembleHangulCharacters('뼈', 'ㅣ')).toEqual('뼈ㅣ'); + }); + + it('소스가 두 글자 이상이라면 Invalid source 에러를 발생시킨다.', () => { + assert.throws( + () => binaryAssembleHangulCharacters('가나', 'ㄴ'), + Error, + 'Invalid source character: 가나. Source must be one character.' + ); + assert.throws( + () => binaryAssembleHangulCharacters('ㄱㄴ', 'ㅏ'), + Error, + 'Invalid source character: ㄱㄴ. Source must be one character.' + ); + }); + + it('다음 문자가 한글 문자 한 글자가 아니라면 Invalid next character 에러를 발생시킨다.', () => { + assert.throws( + () => binaryAssembleHangulCharacters('ㄱ', 'a'), + Error, + 'Invalid next character: a. Next character must be one of the chosung, jungsung, or jongsung.' + ); + assert.throws( + () => binaryAssembleHangulCharacters('ㄱ', 'ㅡㅏ'), + Error, + 'Invalid next character: ㅡㅏ. Next character must be one of the chosung, jungsung, or jongsung.' + ); + }); +}); + +describe('binaryAssembleHangul', () => { + it('문장과 모음을 조합하여 다음 글자를 생성한다', () => { + expect(binaryAssembleHangul('저는 고양이를 좋아합닏', 'ㅏ')).toEqual('저는 고양이를 좋아합니다'); + }); + + it('문장과 자음을 조합하여 홑받침을 생성한다', () => { + expect(binaryAssembleHangul('저는 고양이를 좋아하', 'ㅂ')).toEqual('저는 고양이를 좋아합'); + }); + + it('문장과 자음을 조합하여 겹받침을 생성한다', () => { + expect(binaryAssembleHangul('저는 고양이를 좋아합', 'ㅅ')).toEqual('저는 고양이를 좋아핪'); + }); + + it('조합이 불가능한 자음이 입력되면 단순 Join 한다', () => { + expect(binaryAssembleHangul('저는 고양이를 좋아합', 'ㄲ')).toEqual('저는 고양이를 좋아합ㄲ'); + expect(binaryAssembleHangul('저는 고양이를 좋아합', 'ㅂ')).toEqual('저는 고양이를 좋아합ㅂ'); + }); + + it('조합이 불가능한 모음이 입력되면 단순 Join 한다', () => { + expect(binaryAssembleHangul('저는 고양이를 좋아하', 'ㅏ')).toEqual('저는 고양이를 좋아하ㅏ'); + expect(binaryAssembleHangul('저는 고양이를 좋아합니다', 'ㅜ')).toEqual('저는 고양이를 좋아합니다ㅜ'); + }); +}); diff --git a/src/_internal/hangul.ts b/src/_internal/hangul.ts new file mode 100644 index 00000000..6a1479ca --- /dev/null +++ b/src/_internal/hangul.ts @@ -0,0 +1,143 @@ +import assert, { excludeLastElement, isBlank, joinString } from '.'; +import { combineHangulCharacter, combineVowels, curriedCombineHangulCharacter } from '../combineHangulCharacter'; +import { disassembleHangulToGroups } from '../disassemble'; +import { removeLastHangulCharacter } from '../removeLastHangulCharacter'; +import { canBeChosung, canBeJongsung, canBeJungsung, hasSingleBatchim } from '../utils'; + +export function isHangulCharacter(character: string) { + return /^[가-힣]$/.test(character); +} + +export function isHangulAlphabet(character: string) { + return /^[ㄱ-ㅣ]$/.test(character); +} + +/** + * @name binaryAssembleHangulAlphabets + * @description + * 두 개의 한글 자모를 합칩니다. 완성된 한글 문자는 취급하지 않습니다. + * @example + * ``` + * binaryAssembleHangulAlphabets('ㄱ', 'ㅏ') // 가 + * binaryAssembleHangulAlphabets('ㅗ', 'ㅏ') // ㅘ + * ``` + */ +export function binaryAssembleHangulAlphabets(source: string, nextCharacter: string) { + if (canBeJungsung(`${source}${nextCharacter}`)) { + return combineVowels(source, nextCharacter); + } + + const isConsonantSource = canBeJungsung(source) === false; + if (isConsonantSource && canBeJungsung(nextCharacter)) { + return combineHangulCharacter(source, nextCharacter); + } + + return joinString(source, nextCharacter); +} + +/** + * @name linkHangulCharacters + * @description + * 연음 법칙을 적용하여 두 개의 한글 문자를 연결합니다. + */ +export function linkHangulCharacters(source: string, nextCharacter: string) { + const sourceJamo = disassembleHangulToGroups(source)[0]; + const [, lastJamo] = excludeLastElement(sourceJamo); + + return joinString(removeLastHangulCharacter(source), combineHangulCharacter(lastJamo, nextCharacter)); +} + +/** + * @name binaryAssembleHangulCharacters + * @description + * 인자로 받은 한글 문자 2개를 합성합니다. + * ```typescript + * binaryAssembleHangulCharacters( + * // 소스 문자 + * source: string + * // 다음 문자 + * nextCharacter: string + * ): string + * ``` + * @example + * binaryAssembleHangulCharacters('ㄱ', 'ㅏ') // 가 + * binaryAssembleHangulCharacters('가', 'ㅇ') // 강 + * binaryAssembleHangulCharacters('갑', 'ㅅ') // 값 + * binaryAssembleHangulCharacters('깎', 'ㅏ') // 까까 + */ +export function binaryAssembleHangulCharacters(source: string, nextCharacter: string) { + assert( + isHangulCharacter(source) || isHangulAlphabet(source), + `Invalid source character: ${source}. Source must be one character.` + ); + assert( + isHangulAlphabet(nextCharacter), + `Invalid next character: ${nextCharacter}. Next character must be one of the chosung, jungsung, or jongsung.` + ); + + const sourceJamos = disassembleHangulToGroups(source)[0]; + + const isSingleCharacter = sourceJamos.length === 1; + if (isSingleCharacter) { + const sourceCharacter = sourceJamos[0]; + return binaryAssembleHangulAlphabets(sourceCharacter, nextCharacter); + } + + const [restJamos, lastJamo] = excludeLastElement(sourceJamos); + + const needLinking = canBeChosung(lastJamo) && canBeJungsung(nextCharacter); + if (needLinking) { + return linkHangulCharacters(source, nextCharacter); + } + + const fixConsonant = curriedCombineHangulCharacter; + const combineJungsung = fixConsonant(restJamos[0]); + + if (canBeJungsung(`${lastJamo}${nextCharacter}`)) { + return combineJungsung(`${lastJamo}${nextCharacter}`)(); + } + + if (canBeJungsung(lastJamo) && canBeJongsung(nextCharacter)) { + return combineJungsung(lastJamo)(nextCharacter); + } + + const fixVowel = combineJungsung; + const combineJongsung = fixVowel(restJamos[1]); + + const lastConsonant = lastJamo; + + if (hasSingleBatchim(source) && canBeJongsung(`${lastConsonant}${nextCharacter}`)) { + return combineJongsung(`${lastConsonant}${nextCharacter}`); + } + + return joinString(source, nextCharacter); +} + +/** + * @name binaryAssembleHangul + * @description + * 인자로 받은 한글 문장과 한글 문자 하나를 합성합니다. + * ```typescript + * binaryAssembleHangul( + * // 한글 문장 + * source: string + * // 한글 문자 + * nextCharacter: string + * ): string + * ``` + * @example + * binaryAssembleHangul('저는 고양이를 좋아합닏', 'ㅏ') // 저는 고양이를 좋아합니다 + * binaryAssembleHangul('저는 고양이를 좋아합', 'ㅅ') // 저는 고양이를 좋아핪 + * binaryAssembleHangul('저는 고양이를 좋아하', 'ㅏ') // 저는 고양이를 좋아하ㅏ + */ +export function binaryAssembleHangul(source: string, nextCharacter: string) { + const [rest, lastCharacter] = excludeLastElement(source.split('')); + const needJoinString = isBlank(lastCharacter) || isBlank(nextCharacter); + + return joinString( + ...rest, + needJoinString + ? joinString(lastCharacter, nextCharacter) + : binaryAssembleHangulCharacters(lastCharacter, nextCharacter) + ); +} diff --git a/src/_internal/index.ts b/src/_internal/index.ts new file mode 100644 index 00000000..48aa3bb0 --- /dev/null +++ b/src/_internal/index.ts @@ -0,0 +1,18 @@ +export function excludeLastElement(array: string[]): [string[], string] { + const lastElement = array.at(-1); + return [array.slice(0, -1), lastElement ?? '']; +} + +export function joinString(...args: string[]) { + return args.join(''); +} + +export function isBlank(character: string) { + return /^\s$/.test(character); +} + +export default function assert(condition: boolean, errorMessage?: string): asserts condition { + if (condition === false) { + throw new Error(errorMessage ?? 'Invalid condition'); + } +} diff --git a/src/assemble.spec.ts b/src/assemble.spec.ts new file mode 100644 index 00000000..d871685c --- /dev/null +++ b/src/assemble.spec.ts @@ -0,0 +1,14 @@ +import { describe, expect, it } from 'vitest'; +import { assembleHangul } from './assemble'; + +describe('assembleHangul', () => { + it('온전한 한글과 한글 문자 조합', () => { + expect(assembleHangul(['아버지가', ' ', '방ㅇ', 'ㅔ ', '들ㅇ', 'ㅓ갑니다'])).toEqual('아버지가 방에 들어갑니다'); + }); + it('온전한 한글만 조합', () => { + expect(assembleHangul(['아버지가', ' ', '방에 ', '들어갑니다'])).toEqual('아버지가 방에 들어갑니다'); + }); + it('온전하지 않은 한글만 조합', () => { + expect(assembleHangul(['ㅇ', 'ㅏ', 'ㅂ', 'ㅓ', 'ㅈ', 'ㅣ'])).toEqual('아버지'); + }); +}); diff --git a/src/assemble.ts b/src/assemble.ts new file mode 100644 index 00000000..ced978aa --- /dev/null +++ b/src/assemble.ts @@ -0,0 +1,22 @@ +import { disassembleHangul } from './disassemble'; +import { binaryAssembleHangul } from './_internal/hangul'; + +/** + * @name assembleHangul + * @description + * 인자로 받은 배열에 담긴 한글 문장과 문자를 한글 규칙에 맞게 합성합니다. + * ```typescript + * assembleHangul( + * // 한글 문자와 문장을 담고 있는 배열 + * words: string[] + * ): string + * ``` + * @example + * assembleHangul(['아버지가', ' ', '방ㅇ', 'ㅔ ', '들ㅇ', 'ㅓ갑니다']) // 아버지가 방에 들어갑니다 + * assembleHangul(['아버지가', ' ', '방에 ', '들어갑니다']) // 아버지가 방에 들어갑니다 + * assembleHangul(['ㅇ', 'ㅏ', 'ㅂ', 'ㅓ', 'ㅈ', 'ㅣ']) // 아버지 + */ +export function assembleHangul(words: string[]) { + const disassembled = disassembleHangul(words.join('')).split(''); + return disassembled.reduce(binaryAssembleHangul); +} diff --git a/src/index.ts b/src/index.ts index 3dc62e14..4df89c2c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,3 +3,6 @@ export * from './disassemble'; export * from './hangulIncludes'; export * from './josa'; export * from './utils'; +export * from './assemble'; +export * from './combineHangulCharacter'; +export * from './removeLastHangulCharacter'; diff --git a/src/utils.spec.ts b/src/utils.spec.ts index e724051c..1e5f7dca 100644 --- a/src/utils.spec.ts +++ b/src/utils.spec.ts @@ -1,5 +1,14 @@ import { describe, expect, expectTypeOf, it } from 'vitest'; -import { canBeChosung, canBeJongsung, canBeJungsung, getFirstConsonants, hasBatchim, hasProperty, hasValueInReadOnlyStringList } from './utils'; +import { + canBeChosung, + canBeJongsung, + canBeJungsung, + getFirstConsonants, + hasBatchim, + hasProperty, + hasSingleBatchim, + hasValueInReadOnlyStringList, +} from './utils'; describe('hasBatchim', () => { it('should return true for the character "값"', () => { @@ -19,6 +28,24 @@ describe('hasBatchim', () => { }); }); +describe('hasSingleBatchim', () => { + it('홑받침을 받으면 true를 반환한다.', () => { + expect(hasSingleBatchim('공')).toBe(true); + expect(hasSingleBatchim('핫')).toBe(true); + expect(hasSingleBatchim('양')).toBe(true); + expect(hasSingleBatchim('신')).toBe(true); + }); + it('겹받침을 받으면 false를 반환한다.', () => { + expect(hasSingleBatchim('값')).toBe(false); + expect(hasSingleBatchim('읊')).toBe(false); + }); + + it('받침이 없는 문자를 받으면 false를 반환한다.', () => { + expect(hasSingleBatchim('토')).toBe(false); + expect(hasSingleBatchim('서')).toBe(false); + }); +}); + describe('getFirstConsonants', () => { it('should extract the initial consonants "ㅅㄱ" from the word "사과"', () => { expect(getFirstConsonants('사과')).toBe('ㅅㄱ'); diff --git a/src/utils.ts b/src/utils.ts index 332ab77a..ddab75d7 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -3,7 +3,7 @@ import { HANGUL_CHARACTERS_BY_LAST_INDEX, HANGUL_CHARACTERS_BY_MIDDLE_INDEX, } from './constants'; -import { disassembleHangulToGroups } from './disassemble'; +import { disassembleHangul, disassembleHangulToGroups } from './disassemble'; import { disassembleCompleteHangulCharacter } from './disassembleCompleteHangulCharacter'; /** @@ -26,6 +26,31 @@ export function hasBatchim(str: string) { return disassembled != null && disassembled.last !== ''; } +/** + * @name hasSingleBatchim + * @description + * 한글 문자열의 마지막 글자가 홑받침이 있는지 확인합니다. + * ```typescript + * hasSingleBatchim( + * // 글자에 받침이 있는지 확인하고 싶은 문자열 + * str: string + * ): boolean + * ``` + * @example + * hasSingleBatchim('갑') // true + * hasSingleBatchim('값') // false + * hasSingleBatchim('토') // false + */ +export function hasSingleBatchim(str: string) { + const lastChar = str[str.length - 1]!; + if (hasBatchim(lastChar) === false) { + return false; + } + + const disassembled = disassembleHangul(lastChar); + return disassembled.length === 3; +} + /** * @name getFirstConsonants * @description