Skip to content

Commit

Permalink
small refactoring, add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
forrest79 committed Feb 1, 2023
1 parent 1740e60 commit 64c99a9
Show file tree
Hide file tree
Showing 14 changed files with 47,570 additions and 47,393 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: build

on:
push:
pull_request:
schedule:
- cron: '0 10 * * 1'

jobs:
run:
runs-on: 'ubuntu-20.04'

strategy:
matrix:
postgresql-versions: ['11', '12', '13', '14', '15']
fail-fast: false

services:
postgres:
image: postgres:${{ matrix.postgresql-versions }}
env:
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 3

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.2
coverage: none

- name: Run tests
run: tests/run-tests
22 changes: 22 additions & 0 deletions .github/workflows/check-new-version.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: build

on:
schedule:
- cron: '0 10 * * 1'

jobs:
run:
runs-on: 'ubuntu-20.04'

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.2
coverage: none

- name: Check new version
run: tests/check-new-version
76 changes: 76 additions & 0 deletions bin/build-sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/php
<?php declare(strict_types=1);

$dataDir = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'data';
$unidecodeDir = $dataDir . DIRECTORY_SEPARATOR . 'Unidecode';
$distDir = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'dist';

if (!is_dir($unidecodeDir)) {
echo sprintf('Text::Unidecode files \'x??.pm\' are expecting in \'%s\' directory.', $unidecodeDir);
exit(1);
}

$transliterate = [];

foreach (glob($unidecodeDir . DIRECTORY_SEPARATOR . '*.pm') as $filename) {
$phpFilename = $filename . '.php';

$data = file_get_contents($filename);

$data = preg_replace_callback('/qq\{(.*)\},/U', function($matches) {
$item = $matches[1];
switch ($item) {
case '\@' :
case '\{' :
case '\}' :
$item = ltrim($item, '\\');
break;
}
return '"' . (!str_contains($item, '"') ? $item : addslashes($item)) . '",';
}, $data);

$data = preg_replace_callback('/q\{(.*)\},/U', function($matches) {
$item = $matches[1];
return "'" . (!str_contains($item, "'") ? $item : addslashes($item)) . "',";
}, $data);

$data = str_replace('Text::Unidecode::make_placeholder_map()', '[]', $data);

$data = substr(trim(str_replace('$Text::Unidecode::Char', '$transliterate', $data)), 0, -2);

file_put_contents($phpFilename, '<?php' . PHP_EOL . $data);

require($phpFilename);
unlink($phpFilename);
}

$pgEscape = fn (string $text): string => str_replace('\\', '\\\\', str_replace('\'', '\'\'', $text));

$log = '';
$sql = '';

foreach ($transliterate as $x => $items) {
foreach ($items as $y => $item) {
$utf8Char = ($x * 256) + $y;

// keep first 127 characters as is, ignore last 2 characters
if (($utf8Char < 127) || ($utf8Char >= 65534)) {
continue;
}

// create UTF-8 char by int value
$text = mb_convert_encoding(sprintf('&#%s;', intval($utf8Char)), 'UTF-8', 'HTML-ENTITIES');

// (instead of [?] we want '') || (we don't want to tolerate Win-1252 input, see x00.pm)
if (($item == '[?]') || ($utf8Char <= 159)) {
$item = '';
}

$log .= sprintf('(%s) %s -> %s', $utf8Char, $text, $item) . PHP_EOL;

$sql .= PHP_EOL . "\t" . '(\'' . $pgEscape($text) . '\', \'' . $pgEscape($item) . '\'),';
}
}

file_put_contents($dataDir . DIRECTORY_SEPARATOR . 'transliterate.log', $log);
file_put_contents($distDir . DIRECTORY_SEPARATOR . 'transliterate-data.sql', 'INSERT INTO system.transliterate_to_ascii_rules(chr, trans) VALUES' . mb_substr($sql, 0, -1) . ';' . PHP_EOL);
1 change: 1 addition & 0 deletions data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.log
File renamed without changes.
Loading

0 comments on commit 64c99a9

Please sign in to comment.