Skip to content

Commit 9995672

Browse files
committed
WIP - wait until postgres/postgres@59f47fb will be in some production version
TODO: benchmark unaccent with the current solution TODO: update documentation
1 parent b87e04b commit 9995672

File tree

3 files changed

+47256
-0
lines changed

3 files changed

+47256
-0
lines changed

bin/build-sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ $pgEscape = fn (string $text): string => str_replace('\\', '\\\\', str_replace('
4848

4949
$log = '';
5050
$sql = '';
51+
$unaccentRules = '';
5152

5253
foreach ($transliterate as $x => $items) {
5354
foreach ($items as $y => $item) {
@@ -69,8 +70,16 @@ foreach ($transliterate as $x => $items) {
6970
$log .= sprintf('(%s) %s -> %s', $utf8Char, $text, $item) . PHP_EOL;
7071

7172
$sql .= PHP_EOL . "\t" . '(\'' . $pgEscape($text) . '\', \'' . $pgEscape($item) . '\'),';
73+
74+
$unaccentRules .= $text;
75+
if ($item !== '') {
76+
$item = str_replace('"', '""', $item);
77+
$unaccentRules .= "\t" . (str_contains($item, ' ') ? sprintf('"%s"', $item) : $item);
78+
}
79+
$unaccentRules .= PHP_EOL;
7280
}
7381
}
7482

7583
file_put_contents($dataDir . DIRECTORY_SEPARATOR . 'transliterate.log', $log);
7684
file_put_contents($distDir . DIRECTORY_SEPARATOR . 'transliterate-data.sql', 'INSERT INTO system.transliterate_to_ascii_rules(chr, trans) VALUES' . mb_substr($sql, 0, -1) . ';' . PHP_EOL);
85+
file_put_contents($distDir . DIRECTORY_SEPARATOR . 'transliterate_utf8_to_ascii.rules', $unaccentRules);

0 commit comments

Comments
 (0)