From b48e23c94abd0b01a41907fff427b28d8513dc2e Mon Sep 17 00:00:00 2001 From: Karel Wintersky Date: Fri, 19 Nov 2021 01:21:54 +0300 Subject: [PATCH] Implement LitEmoji::removeEmoji() (#28) --- .gitattributes | 13 +++++++++++++ README.md | 4 ++++ composer.json | 3 ++- src/LitEmoji.php | 36 +++++++++++++++++++++++++----------- tests/LitEmojiTest.php | 6 ++++++ 5 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f727334 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,13 @@ +* text=auto +* eol=lf + +.git export-ignore +.gitattributes export-ignore +.gitignore export-ignore +.github export-ignore +tests export-ignore +vendor export-ignore +phpunit.xml export-ignore +bin export-ignore + + diff --git a/README.md b/README.md index bbba685..b91dc94 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,10 @@ echo LitEmoji::encodeHtml('Baby you light my :fire:! :smiley:'); echo LitEmoji::encodeUnicode('Baby you light my :fire:! :smiley:'); // 'Baby you light my 🔥! 😃' + +echo LitEmoji::removeEmoji('Baby you light my 🔥! 😃!!!'); +// 'Baby you light my ! !!!' + ``` # Configuration diff --git a/composer.json b/composer.json index fa96c22..db0f742 100644 --- a/composer.json +++ b/composer.json @@ -5,7 +5,8 @@ "keywords": ["php-emoji", "emoji"], "license": "MIT", "require": { - "php": ">=7.3" + "php": ">=7.3", + "ext-mbstring": "*" }, "require-dev": { "phpunit/phpunit": "9.4.*", diff --git a/src/LitEmoji.php b/src/LitEmoji.php index 300c7d8..56c60f3 100644 --- a/src/LitEmoji.php +++ b/src/LitEmoji.php @@ -4,7 +4,7 @@ class LitEmoji { - const MB_REGEX = '/( + public const MB_REGEX = '/( \x23\xE2\x83\xA3 # Digits [\x30-\x39]\xE2\x83\xA3 | \xE2[\x9C-\x9E][\x80-\xBF] # Dingbats @@ -28,7 +28,7 @@ class LitEmoji * @param string $content * @return string */ - public static function encodeShortcode($content) + public static function encodeShortcode(string $content): string { $content = self::entitiesToUnicode($content); $content = self::unicodeToShortcode($content); @@ -42,7 +42,7 @@ public static function encodeShortcode($content) * @param string $content * @return string */ - public static function encodeHtml($content) + public static function encodeHtml(string $content): string { $content = self::unicodeToShortcode($content); $content = self::shortcodeToEntities($content); @@ -56,7 +56,7 @@ public static function encodeHtml($content) * @param string $content * @return string */ - public static function encodeUnicode($content) + public static function encodeUnicode(string $content): string { $content = self::shortcodeToUnicode($content); $content = self::entitiesToUnicode($content); @@ -70,7 +70,7 @@ public static function encodeUnicode($content) * @param string $content * @return string */ - public static function shortcodeToUnicode($content) + public static function shortcodeToUnicode(string $content): string { $replacements = self::getShortcodeCodepoints(); return str_replace(array_keys($replacements), $replacements, $content); @@ -82,10 +82,10 @@ public static function shortcodeToUnicode($content) * @param string $content * @return string */ - public static function entitiesToUnicode($content) + public static function entitiesToUnicode(string $content): string { /* Convert HTML entities to uppercase hexadecimal */ - $content = preg_replace_callback('/\&\#(x?[a-zA-Z0-9]*?)\;/', function($matches) { + $content = preg_replace_callback('/\&\#(x?[a-zA-Z0-9]*?)\;/', static function($matches) { $code = $matches[1]; if ($code[0] == 'x') { @@ -105,7 +105,7 @@ public static function entitiesToUnicode($content) * @param string $content * @return string */ - public static function unicodeToShortcode($content) + public static function unicodeToShortcode(string $content): string { $replacement = ''; $encoding = mb_detect_encoding($content); @@ -155,7 +155,8 @@ public static function unicodeToShortcode($content) * @param string $content * @return string */ - public static function shortcodeToEntities($content) { + public static function shortcodeToEntities(string $content): string + { $replacements = self::getShortcodeEntities(); return str_replace(array_keys($replacements), $replacements, $content); } @@ -166,7 +167,7 @@ public static function shortcodeToEntities($content) { * @param string $property * @param mixed $value */ - public static function config($property, $value) + public static function config(string $property, $value): void { switch ($property) { case 'excludeShortcodes': @@ -187,6 +188,19 @@ public static function config($property, $value) break; } } + + /** + * Removes all emoji-sequences from string. + * + * @param string $source + * @return string + */ + public static function removeEmoji(string $source): string + { + $content = self::encodeShortcode($source); + $content = preg_replace('/\:\w+\:/', '', $content); + return $content; + } private static function getShortcodes() { @@ -195,7 +209,7 @@ private static function getShortcodes() } // Skip excluded shortcodes - self::$shortcodes = array_filter(require(__DIR__ . '/shortcodes-array.php'), function($code) { + self::$shortcodes = array_filter(require(__DIR__ . '/shortcodes-array.php'), static function($code) { return !in_array($code, self::$excludedShortcodes); }, ARRAY_FILTER_USE_KEY); diff --git a/tests/LitEmojiTest.php b/tests/LitEmojiTest.php index f31b0ee..e883e46 100644 --- a/tests/LitEmojiTest.php +++ b/tests/LitEmojiTest.php @@ -47,6 +47,12 @@ public function testUnicodeToShortcodeTiming() $text = LitEmoji::encodeShortcode(file_get_contents(__DIR__ . '/UnicodeIpsum')); $this->assertEquals(file_get_contents(__DIR__ . '/ShortcodeIpsum'), $text); } + + public function testRemoveEmoji() + { + $text = LitEmoji::removeEmoji('Some text 😊 including emoji 🚀'); + $this->assertEquals('Some text including emoji ', $text); // NB: smile emoji surrounded with TWO spaces. + } public function testConfigExcludeShortcodes() {