Skip to content

Commit

Permalink
WIP - wait until postgres/postgres@59f47fb will be in some production…
Browse files Browse the repository at this point in the history
… version

TODO: benchmark unaccent with the current solution
TODO: update documentation
  • Loading branch information
forrest79 committed Apr 9, 2024
1 parent 5fa7bb7 commit ebaca86
Show file tree
Hide file tree
Showing 3 changed files with 47,256 additions and 0 deletions.
9 changes: 9 additions & 0 deletions bin/build-sql
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ $pgEscape = fn (string $text): string => str_replace('\\', '\\\\', str_replace('

$log = '';
$sql = '';
$unaccentRules = '';

foreach ($transliterate as $x => $items) {
foreach ($items as $y => $item) {
Expand All @@ -69,8 +70,16 @@ foreach ($transliterate as $x => $items) {
$log .= sprintf('(%s) %s -> %s', $utf8Char, $text, $item) . PHP_EOL;

$sql .= PHP_EOL . "\t" . '(\'' . $pgEscape($text) . '\', \'' . $pgEscape($item) . '\'),';

$unaccentRules .= $text;
if ($item !== '') {
$item = str_replace('"', '""', $item);
$unaccentRules .= "\t" . (str_contains($item, ' ') ? sprintf('"%s"', $item) : $item);
}
$unaccentRules .= PHP_EOL;
}
}

file_put_contents($dataDir . DIRECTORY_SEPARATOR . 'transliterate.log', $log);
file_put_contents($distDir . DIRECTORY_SEPARATOR . 'transliterate-data.sql', 'INSERT INTO system.transliterate_to_ascii_rules(chr, trans) VALUES' . mb_substr($sql, 0, -1) . ';' . PHP_EOL);
file_put_contents($distDir . DIRECTORY_SEPARATOR . 'transliterate_utf8_to_ascii.rules', $unaccentRules);
Loading

0 comments on commit ebaca86

Please sign in to comment.