From 37c157a86b2dd969720ec1bbfe21f4642f3790af Mon Sep 17 00:00:00 2001
From: adlifarizi <adlifarizi04@gmail.com>
Date: Thu, 4 Jun 2026 14:14:56 +0700
Subject: [PATCH] fix: restrict Japanese Kanji detection and remove ineffective
 Chinese tonal sandhi logic

---
 .../theveloper/pixelplay/utils/LyricsUtils.kt | 34 ++++---------------
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/app/src/main/java/com/theveloper/pixelplay/utils/LyricsUtils.kt b/app/src/main/java/com/theveloper/pixelplay/utils/LyricsUtils.kt
index 23cff07c9..ca7dcc12c 100644
--- a/app/src/main/java/com/theveloper/pixelplay/utils/LyricsUtils.kt
+++ b/app/src/main/java/com/theveloper/pixelplay/utils/LyricsUtils.kt
@@ -107,12 +107,12 @@ object MultiLangRomanizer {
     private val KYRGYZ_SPECIFIC_CYRILLIC_LETTERS = setOf("Ң", "ң", "Ө", "ө", "Ү", "ү")
     private val MACEDONIAN_SPECIFIC_CYRILLIC_LETTERS = setOf("Ѓ", "ѓ", "Ѕ", "ѕ", "Ќ", "ќ")
 
-    fun isJapanese(text: String): Boolean {
+    fun isJapanese(text: String, entireLyricsHasKana: Boolean = false): Boolean {
         // Detect Hiragana or Katakana
         if (text.any { it in '\u3040'..'\u309F' || it in '\u30A0'..'\u30FF' }) return true
         
-        // Treat Kanji-only lines as Japanese (prioritized over Chinese in the main parsing loop)
-        return text.any { it in '\u4E00'..'\u9FFF' }
+        // Treat Kanji-only lines as Japanese only if the entire song has Japanese kana
+        return entireLyricsHasKana && text.any { it in '\u4E00'..'\u9FFF' }
     }
     fun isKorean(text: String) = text.any { it in '\uAC00'..'\uD7A3' }
     fun isHindi(text: String) = text.any { it in '\u0900'..'\u097F' }
@@ -382,28 +382,6 @@ object MultiLangRomanizer {
                         continue
                     }
 
-                    // ── Tonal sandhi: 一 (yī) ──────────────────────────────────
-                    // Before 4th-tone syllable → yí; before 1st/2nd/3rd → yì
-                    c == '一' -> {
-                        val nextPinyin = next?.let { getPinyin(it, format) } ?: ""
-                        // Pinyin4j without-tone loses tone info; we approximate:
-                        // characters that are typically 4th tone starters
-                        val fourthToneInitials = setOf("是","不","去","做","看","用","要","过","太","对","大","会","在","上","下","个","万","次","号")
-                        val tone = if (next != null && fourthToneInitials.contains(next.toString())) "yi2" else "yi4"
-                        sb.append(tone.dropLast(1)).append(" ")
-                        idx++
-                        continue
-                    }
-
-                    // ── Tonal sandhi: 不 (bù) → bú before 4th-tone syllable ──
-                    c == '不' -> {
-                        val fourthToneChars = setOf("是","去","做","看","用","要","过","太","对","大","会","在","上","下","个","万","次","号","不")
-                        val isBefore4th = next != null && fourthToneChars.contains(next.toString())
-                        sb.append(if (isBefore4th) "bu2" else "bu4").append(" ")
-                        idx++
-                        continue
-                    }
-
                     // ── Regular Hanzi ─────────────────────────────────────────
                     c.toString().matches(Regex("[\u4E00-\u9FA5]")) -> {
                         sb.append(getPinyin(c, format, prevChar = chars.getOrNull(idx - 1))).append(" ")
@@ -752,6 +730,8 @@ object LyricsUtils {
             return Lyrics(plain = emptyList(), synced = emptyList())
         }
 
+        val entireLyricsHasKana = lyricsText.any { it in '\u3040'..'\u309F' || it in '\u30A0'..'\u30FF' }
+
         val normalizedInput = stripLeadingLyricsDocumentNoise(lyricsText)
         if (looksLikeTtmlDocument(normalizedInput)) {
             val converted = TtmlLyricsParser.parseToEnhancedLrc(normalizedInput)
@@ -888,7 +868,7 @@ object LyricsUtils {
             val pairedLines = pairTranslationLines(sortedSyncedLines).map { line ->
 
                 val romanized = when {
-                    MultiLangRomanizer.isJapanese(line.line) -> MultiLangRomanizer.romanizeJapanese(line.line)
+                    MultiLangRomanizer.isJapanese(line.line, entireLyricsHasKana) -> MultiLangRomanizer.romanizeJapanese(line.line)
                     MultiLangRomanizer.isChinese(line.line) -> MultiLangRomanizer.romanizeChinese(line.line)
                     MultiLangRomanizer.isKorean(line.line) -> MultiLangRomanizer.romanizeKorean(line.line)
                     MultiLangRomanizer.isHindi(line.line) -> MultiLangRomanizer.romanizeHindi(line.line)
@@ -910,7 +890,7 @@ object LyricsUtils {
         } else {
             val processedPlain = plainLines.map { line ->
                 val romanized = when {
-                    MultiLangRomanizer.isJapanese(line) -> MultiLangRomanizer.romanizeJapanese(line)
+                    MultiLangRomanizer.isJapanese(line, entireLyricsHasKana) -> MultiLangRomanizer.romanizeJapanese(line)
                     MultiLangRomanizer.isChinese(line) -> MultiLangRomanizer.romanizeChinese(line)
                     MultiLangRomanizer.isKorean(line) -> MultiLangRomanizer.romanizeKorean(line)
                     MultiLangRomanizer.isHindi(line) -> MultiLangRomanizer.romanizeHindi(line)