Improve Android read aloud TTS handling
This commit is contained in:
parent
df90b4c36f
commit
8bb920caf8
@ -9,6 +9,12 @@
|
||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK"/>
|
||||
<uses-permission android:name="android.permission.POST_NOTIFICATIONS"/>
|
||||
|
||||
<queries>
|
||||
<intent>
|
||||
<action android:name="android.intent.action.TTS_SERVICE"/>
|
||||
</intent>
|
||||
</queries>
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
|
||||
@ -18,6 +18,7 @@ import android.speech.tts.UtteranceProgressListener
|
||||
import androidx.core.app.NotificationCompat
|
||||
import androidx.core.app.NotificationManagerCompat
|
||||
import androidx.core.content.ContextCompat
|
||||
import java.util.Locale
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.SupervisorJob
|
||||
@ -29,6 +30,7 @@ import kotlinx.coroutines.launch
|
||||
actual object ReadAloudPlatform {
|
||||
actual val isSupported: Boolean = true
|
||||
actual val state: StateFlow<ReadAloudState> = AndroidReadAloudEngine.state
|
||||
actual val settingsState: StateFlow<ReadAloudSettingsState> = AndroidReadAloudEngine.settingsState
|
||||
|
||||
actual fun prepare(bookTitle: String, sentences: List<ReadAloudSentence>, startIndex: Int) {
|
||||
val context = androidAppContext() ?: return
|
||||
@ -49,6 +51,21 @@ actual object ReadAloudPlatform {
|
||||
actual fun skip(delta: Int) {
|
||||
AndroidReadAloudEngine.skip(delta)
|
||||
}
|
||||
|
||||
actual fun refreshSettings() {
|
||||
val context = androidAppContext() ?: return
|
||||
AndroidReadAloudEngine.refreshSettings(context)
|
||||
}
|
||||
|
||||
actual fun selectEngine(engineId: String?) {
|
||||
val context = androidAppContext() ?: return
|
||||
AndroidReadAloudEngine.selectEngine(context, engineId)
|
||||
}
|
||||
|
||||
actual fun selectVoice(voiceId: String?) {
|
||||
val context = androidAppContext() ?: return
|
||||
AndroidReadAloudEngine.selectVoice(context, voiceId)
|
||||
}
|
||||
}
|
||||
|
||||
private object AndroidReadAloudEngine {
|
||||
@ -56,12 +73,22 @@ private object AndroidReadAloudEngine {
|
||||
private const val BeforePausePrefix = "read-aloud-before-"
|
||||
private const val SpeakPrefix = "read-aloud-speak-"
|
||||
private const val AfterPausePrefix = "read-aloud-after-"
|
||||
private const val SettingsPrefs = "read_aloud_tts"
|
||||
private const val SelectedEngineKey = "selected_engine"
|
||||
private const val EngineChoiceMadeKey = "engine_choice_made"
|
||||
private const val SelectedVoiceKey = "selected_voice"
|
||||
private val RussianLocale = Locale.forLanguageTag("ru-RU")
|
||||
|
||||
private val mutableState = MutableStateFlow(ReadAloudState())
|
||||
val state: StateFlow<ReadAloudState> = mutableState
|
||||
private val mutableSettingsState = MutableStateFlow(ReadAloudSettingsState())
|
||||
val settingsState: StateFlow<ReadAloudSettingsState> = mutableSettingsState
|
||||
|
||||
private var tts: TextToSpeech? = null
|
||||
private var ttsReady = false
|
||||
private var ttsEngineId: String? = null
|
||||
private var engineProbe: TextToSpeech? = null
|
||||
private var voiceProbe: TextToSpeech? = null
|
||||
private var shouldSpeakWhenReady = false
|
||||
private var bookTitle: String = ""
|
||||
private var sentences: List<ReadAloudSentence> = emptyList()
|
||||
@ -110,11 +137,100 @@ private object AndroidReadAloudEngine {
|
||||
}
|
||||
}
|
||||
|
||||
fun refreshSettings(context: Context) {
|
||||
val appContext = context.applicationContext
|
||||
mutableSettingsState.value = mutableSettingsState.value.copy(loading = true, message = null)
|
||||
engineProbe?.shutdown()
|
||||
var probe: TextToSpeech? = null
|
||||
probe = TextToSpeech(appContext) { status ->
|
||||
if (status != TextToSpeech.SUCCESS) {
|
||||
mutableSettingsState.value = mutableSettingsState.value.copy(
|
||||
loading = false,
|
||||
message = "Could not load Android TTS engines.",
|
||||
)
|
||||
probe?.shutdown()
|
||||
if (engineProbe === probe) engineProbe = null
|
||||
return@TextToSpeech
|
||||
}
|
||||
|
||||
val engines = probe?.engines.orEmpty()
|
||||
.map { ReadAloudEngineOption(id = it.name, label = it.label ?: it.name) }
|
||||
.sortedBy { it.label.lowercase() }
|
||||
var selectedEngine = selectedEngineId(appContext).takeIf { saved ->
|
||||
saved != null && engines.any { it.id == saved }
|
||||
}
|
||||
if (selectedEngine == null && !appContext.readAloudPrefs().getBoolean(EngineChoiceMadeKey, false)) {
|
||||
selectedEngine = engines.firstOrNull { it.isRhVoice() }?.id
|
||||
if (selectedEngine != null) {
|
||||
appContext.readAloudPrefs().edit().putString(SelectedEngineKey, selectedEngine).apply()
|
||||
}
|
||||
}
|
||||
mutableSettingsState.value = mutableSettingsState.value.copy(
|
||||
engines = engines,
|
||||
selectedEngineId = selectedEngine,
|
||||
)
|
||||
probe?.shutdown()
|
||||
if (engineProbe === probe) engineProbe = null
|
||||
refreshVoices(appContext, selectedEngine)
|
||||
}
|
||||
engineProbe = probe
|
||||
}
|
||||
|
||||
fun selectEngine(context: Context, engineId: String?) {
|
||||
val appContext = context.applicationContext
|
||||
appContext.readAloudPrefs()
|
||||
.edit()
|
||||
.putBoolean(EngineChoiceMadeKey, true)
|
||||
.putStringOrRemove(SelectedEngineKey, engineId)
|
||||
.remove(SelectedVoiceKey)
|
||||
.apply()
|
||||
resetTts()
|
||||
mutableSettingsState.value = mutableSettingsState.value.copy(
|
||||
selectedEngineId = engineId,
|
||||
selectedVoiceId = null,
|
||||
voices = emptyList(),
|
||||
loading = true,
|
||||
message = null,
|
||||
)
|
||||
refreshVoices(appContext, engineId)
|
||||
}
|
||||
|
||||
fun selectVoice(context: Context, voiceId: String?) {
|
||||
val appContext = context.applicationContext
|
||||
appContext.readAloudPrefs()
|
||||
.edit()
|
||||
.putStringOrRemove(SelectedVoiceKey, voiceId)
|
||||
.apply()
|
||||
mutableSettingsState.value = mutableSettingsState.value.copy(selectedVoiceId = voiceId)
|
||||
applyTtsConfiguration(appContext)
|
||||
if (mutableState.value.playing && ttsReady) {
|
||||
speakCurrent()
|
||||
}
|
||||
}
|
||||
|
||||
private fun ensureTts(context: Context) {
|
||||
if (tts != null) return
|
||||
tts = TextToSpeech(context) { status ->
|
||||
val appContext = context.applicationContext
|
||||
val selectedEngine = selectedEngineId(appContext)
|
||||
if (tts != null && ttsEngineId == selectedEngine) return
|
||||
resetTts()
|
||||
ttsEngineId = selectedEngine
|
||||
tts = if (selectedEngine == null) {
|
||||
TextToSpeech(appContext) { status ->
|
||||
handleTtsInit(appContext, status)
|
||||
}
|
||||
} else {
|
||||
TextToSpeech(
|
||||
appContext,
|
||||
{ status -> handleTtsInit(appContext, status) },
|
||||
selectedEngine,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private fun handleTtsInit(context: Context, status: Int) {
|
||||
ttsReady = status == TextToSpeech.SUCCESS
|
||||
if (ttsReady) {
|
||||
applyTtsConfiguration(context)
|
||||
tts?.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
|
||||
override fun onStart(utteranceId: String?) = Unit
|
||||
|
||||
@ -139,7 +255,6 @@ private object AndroidReadAloudEngine {
|
||||
stop()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun speakCurrent() {
|
||||
val sentence = sentences.getOrNull(currentIndex) ?: run {
|
||||
@ -156,7 +271,7 @@ private object AndroidReadAloudEngine {
|
||||
} else {
|
||||
TextToSpeech.QUEUE_FLUSH
|
||||
}
|
||||
tts?.speak(sentence.text, queueMode, params, "$SpeakPrefix$currentIndex")
|
||||
tts?.speak(sentence.spokenText, queueMode, params, "$SpeakPrefix$currentIndex")
|
||||
if (sentence.pauseAfterMillis > 0) {
|
||||
tts?.playSilentUtterance(sentence.pauseAfterMillis, TextToSpeech.QUEUE_ADD, "$AfterPausePrefix$currentIndex")
|
||||
}
|
||||
@ -182,7 +297,106 @@ private object AndroidReadAloudEngine {
|
||||
stop()
|
||||
}
|
||||
}
|
||||
|
||||
private fun refreshVoices(context: Context, engineId: String?) {
|
||||
voiceProbe?.shutdown()
|
||||
var probe: TextToSpeech? = null
|
||||
probe = if (engineId == null) {
|
||||
TextToSpeech(context) { status ->
|
||||
handleVoiceProbeInit(context, probe, status)
|
||||
}
|
||||
} else {
|
||||
TextToSpeech(
|
||||
context,
|
||||
{ status -> handleVoiceProbeInit(context, probe, status) },
|
||||
engineId,
|
||||
)
|
||||
}
|
||||
voiceProbe = probe
|
||||
}
|
||||
|
||||
private fun handleVoiceProbeInit(context: Context, probe: TextToSpeech?, status: Int) {
|
||||
if (status != TextToSpeech.SUCCESS) {
|
||||
mutableSettingsState.value = mutableSettingsState.value.copy(
|
||||
loading = false,
|
||||
message = "Could not load voices for the selected TTS engine.",
|
||||
)
|
||||
probe?.shutdown()
|
||||
if (voiceProbe === probe) voiceProbe = null
|
||||
return
|
||||
}
|
||||
|
||||
val voices = probe?.voices.orEmpty()
|
||||
.mapNotNull { voice ->
|
||||
val locale = voice.locale ?: return@mapNotNull null
|
||||
ReadAloudVoiceOption(
|
||||
id = voice.name,
|
||||
label = voice.name,
|
||||
localeTag = locale.toLanguageTag(),
|
||||
networkRequired = voice.isNetworkConnectionRequired,
|
||||
)
|
||||
}
|
||||
.filter { !it.networkRequired }
|
||||
.sortedWith(
|
||||
compareByDescending<ReadAloudVoiceOption> { it.localeTag.startsWith("ru", ignoreCase = true) }
|
||||
.thenBy { it.localeTag }
|
||||
.thenBy { it.label.lowercase() },
|
||||
)
|
||||
val savedVoice = selectedVoiceId(context).takeIf { saved ->
|
||||
saved != null && voices.any { it.id == saved }
|
||||
}
|
||||
mutableSettingsState.value = mutableSettingsState.value.copy(
|
||||
voices = voices,
|
||||
selectedVoiceId = savedVoice,
|
||||
loading = false,
|
||||
message = if (voices.isEmpty()) "No offline voices reported by the selected TTS engine." else null,
|
||||
)
|
||||
if (savedVoice == null && selectedVoiceId(context) != null) {
|
||||
context.readAloudPrefs().edit().remove(SelectedVoiceKey).apply()
|
||||
}
|
||||
probe?.shutdown()
|
||||
if (voiceProbe === probe) voiceProbe = null
|
||||
}
|
||||
|
||||
private fun applyTtsConfiguration(context: Context) {
|
||||
val selectedVoice = selectedVoiceId(context)
|
||||
val voice = selectedVoice?.let { voiceName ->
|
||||
tts?.voices?.firstOrNull { it.name == voiceName && !it.isNetworkConnectionRequired }
|
||||
}
|
||||
if (voice != null) {
|
||||
tts?.voice = voice
|
||||
} else {
|
||||
tts?.setLanguage(RussianLocale)
|
||||
}
|
||||
}
|
||||
|
||||
private fun resetTts() {
|
||||
shouldSpeakWhenReady = false
|
||||
ttsReady = false
|
||||
ttsEngineId = null
|
||||
tts?.stop()
|
||||
tts?.shutdown()
|
||||
tts = null
|
||||
}
|
||||
|
||||
private fun selectedEngineId(context: Context): String? =
|
||||
context.readAloudPrefs().getString(SelectedEngineKey, null)
|
||||
|
||||
private fun selectedVoiceId(context: Context): String? =
|
||||
context.readAloudPrefs().getString(SelectedVoiceKey, null)
|
||||
|
||||
private fun Context.readAloudPrefs() =
|
||||
getSharedPreferences(SettingsPrefs, Context.MODE_PRIVATE)
|
||||
|
||||
private fun ReadAloudEngineOption.isRhVoice(): Boolean =
|
||||
id.contains("rhvoice", ignoreCase = true) || label.contains("RHVoice", ignoreCase = true)
|
||||
}
|
||||
|
||||
private fun android.content.SharedPreferences.Editor.putStringOrRemove(
|
||||
key: String,
|
||||
value: String?,
|
||||
): android.content.SharedPreferences.Editor =
|
||||
if (value == null) remove(key) else putString(key, value)
|
||||
|
||||
class ReadAloudService : Service() {
|
||||
private val scope = CoroutineScope(SupervisorJob() + Dispatchers.Main.immediate)
|
||||
|
||||
@ -8,6 +8,7 @@ data class ReadAloudSentence(
|
||||
val start: Int,
|
||||
val endExclusive: Int,
|
||||
val text: String,
|
||||
val spokenText: String = text,
|
||||
val pauseBeforeMillis: Long = 0,
|
||||
val pauseAfterMillis: Long = 0,
|
||||
)
|
||||
@ -18,12 +19,37 @@ data class ReadAloudState(
|
||||
val sentenceIndex: Int? = null,
|
||||
)
|
||||
|
||||
data class ReadAloudEngineOption(
|
||||
val id: String,
|
||||
val label: String,
|
||||
)
|
||||
|
||||
data class ReadAloudVoiceOption(
|
||||
val id: String,
|
||||
val label: String,
|
||||
val localeTag: String,
|
||||
val networkRequired: Boolean,
|
||||
)
|
||||
|
||||
data class ReadAloudSettingsState(
|
||||
val engines: List<ReadAloudEngineOption> = emptyList(),
|
||||
val selectedEngineId: String? = null,
|
||||
val voices: List<ReadAloudVoiceOption> = emptyList(),
|
||||
val selectedVoiceId: String? = null,
|
||||
val loading: Boolean = false,
|
||||
val message: String? = null,
|
||||
)
|
||||
|
||||
expect object ReadAloudPlatform {
|
||||
val isSupported: Boolean
|
||||
val state: StateFlow<ReadAloudState>
|
||||
val settingsState: StateFlow<ReadAloudSettingsState>
|
||||
|
||||
fun prepare(bookTitle: String, sentences: List<ReadAloudSentence>, startIndex: Int)
|
||||
fun play()
|
||||
fun stop()
|
||||
fun skip(delta: Int)
|
||||
fun refreshSettings()
|
||||
fun selectEngine(engineId: String?)
|
||||
fun selectVoice(voiceId: String?)
|
||||
}
|
||||
|
||||
@ -93,11 +93,7 @@ internal fun ContinuousBookReader(
|
||||
val hyphenation = remember { HyphenationRegistry() }
|
||||
val scope = rememberCoroutineScope()
|
||||
val textLineMetricsByItem = remember(contentPlan) { mutableStateMapOf<Int, TextLineMetrics>() }
|
||||
val contentPadding = if (isAndroidPlatform()) {
|
||||
PaddingValues(start = 0.dp, top = 6.dp, end = 0.dp, bottom = 6.dp)
|
||||
} else {
|
||||
PaddingValues(horizontal = 4.dp, vertical = 6.dp)
|
||||
}
|
||||
val contentPadding = PaddingValues(6.dp)
|
||||
|
||||
LazyColumn(
|
||||
state = listState,
|
||||
@ -174,10 +170,8 @@ internal fun ContinuousBookReader(
|
||||
hyphenation = hyphenation,
|
||||
style = readerParagraphTextStyle(book.language),
|
||||
highlightedRange = highlightedRange,
|
||||
/* Justify adds extra padding to the end, which hardly can be removed */
|
||||
textAlign = TextAlign.Justify,
|
||||
// so we add 6.dp to make it look symmetric
|
||||
modifier = Modifier.padding(start = (element.depth * 8).dp + 6.dp, end = 0.dp),
|
||||
modifier = Modifier.padding(start = (element.depth * 8).dp, end = 0.dp),
|
||||
onTextLayout = { textLineMetricsByItem[itemIndex] = it.toTextLineMetrics() },
|
||||
)
|
||||
is ReaderElement.Subtitle -> ReaderText(
|
||||
@ -444,7 +438,7 @@ private fun ReaderPane(book: Fb2Book, section: Fb2Section?, modifier: Modifier =
|
||||
language = book.language,
|
||||
hyphenation = hyphenation,
|
||||
style = readerParagraphTextStyle(book.language),
|
||||
textAlign = TextAlign.Justify,
|
||||
textAlign = TextAlign.Unspecified,
|
||||
)
|
||||
is Fb2Block.Subtitle -> ReaderText(
|
||||
text = block.content,
|
||||
@ -473,7 +467,7 @@ private fun ReaderText(
|
||||
) {
|
||||
val highlightColor = MaterialTheme.colorScheme.secondaryContainer
|
||||
val annotatedText = text.toAnnotatedString(language, hyphenation, highlightedRange, highlightColor)
|
||||
val needsSoftHyphenPaintWorkaround = isDesktopPlatform()
|
||||
val needsSoftHyphenPaintWorkaround = isDesktopPlatform
|
||||
var textLayout by remember(annotatedText) { mutableStateOf<TextLayoutResult?>(null) }
|
||||
val desktopHyphenColor = MaterialTheme.colorScheme.onSurface
|
||||
val desktopHyphenGutter = 8.dp
|
||||
@ -525,19 +519,22 @@ private fun ReaderText(
|
||||
@Composable
|
||||
private fun readerParagraphTextStyle(language: String?): TextStyle =
|
||||
MaterialTheme.typography.bodyLarge.copy(
|
||||
fontWeight = if( isAndroidPlatform()) FontWeight(350) else FontWeight.Normal,
|
||||
fontSize = if( isAndroidPlatform()) 21.sp else 18.sp,
|
||||
lineHeight = 28.sp,
|
||||
hyphens = if (isAndroidPlatform()) Hyphens.Auto else Hyphens.Unspecified,
|
||||
lineBreak = if (isAndroidPlatform()) LineBreak.Paragraph else LineBreak.Unspecified,
|
||||
fontWeight = if( isAndroidPlatform) FontWeight(350) else FontWeight.Normal,
|
||||
fontSize = if( isAndroidPlatform) 19.sp else 18.sp,
|
||||
lineHeight = 26.sp,
|
||||
letterSpacing = if (isAndroidPlatform) 0.sp else MaterialTheme.typography.bodyLarge.letterSpacing,
|
||||
hyphens = if (isAndroidPlatform) Hyphens.Auto else Hyphens.Unspecified,
|
||||
lineBreak = if (isAndroidPlatform) LineBreak.Paragraph else LineBreak.Unspecified,
|
||||
localeList = language?.takeIf(String::isNotBlank)?.let { LocaleList(Locale(it)) },
|
||||
)
|
||||
|
||||
private fun isAndroidPlatform(): Boolean =
|
||||
private val isAndroidPlatform: Boolean by lazy {
|
||||
getPlatform().name.startsWith("Android")
|
||||
}
|
||||
|
||||
private fun isDesktopPlatform(): Boolean =
|
||||
private val isDesktopPlatform: Boolean by lazy {
|
||||
getPlatform().name.startsWith("Java")
|
||||
}
|
||||
|
||||
private fun TextLayoutResult.endsAtSoftHyphen(text: String, line: Int): Boolean {
|
||||
val end = getLineEnd(line, visibleEnd = false)
|
||||
@ -663,6 +660,7 @@ private fun AnnotatedString.Builder.appendWithHighlight(
|
||||
internal fun buildReaderContentPlan(book: Fb2Book): ReaderContentPlan {
|
||||
val elements = mutableListOf<ReaderElement>()
|
||||
val sentences = mutableListOf<ReadAloudSentence>()
|
||||
var pendingPauseBeforeMillis = 0L
|
||||
|
||||
fun addTextSentences(
|
||||
itemIndex: Int,
|
||||
@ -670,17 +668,27 @@ internal fun buildReaderContentPlan(book: Fb2Book): ReaderContentPlan {
|
||||
pauseBeforeMillis: Long = 0,
|
||||
pauseAfterMillis: Long = 0,
|
||||
) {
|
||||
text.plainText().sentenceRanges().forEach { range ->
|
||||
val sentenceText = text.plainText().substring(range.start, range.endExclusive).trim()
|
||||
if (sentenceText.isNotEmpty()) {
|
||||
val plainText = text.plainText()
|
||||
if (plainText.isReadAloudPauseBreak()) {
|
||||
pendingPauseBeforeMillis = max(pendingPauseBeforeMillis, StarBreakPauseMillis)
|
||||
return
|
||||
}
|
||||
|
||||
plainText.sentenceRanges().forEach { range ->
|
||||
val sentenceText = plainText.substring(range.start, range.endExclusive).trim()
|
||||
val spokenText = sentenceText.toReadAloudSpokenText()
|
||||
if (spokenText.isNotEmpty()) {
|
||||
val effectivePauseBefore = pauseBeforeMillis + pendingPauseBeforeMillis
|
||||
pendingPauseBeforeMillis = 0L
|
||||
sentences += ReadAloudSentence(
|
||||
index = sentences.size,
|
||||
itemIndex = itemIndex,
|
||||
start = range.start,
|
||||
endExclusive = range.endExclusive,
|
||||
text = sentenceText,
|
||||
pauseBeforeMillis = pauseBeforeMillis,
|
||||
pauseAfterMillis = pauseAfterMillis,
|
||||
spokenText = spokenText,
|
||||
pauseBeforeMillis = effectivePauseBefore,
|
||||
pauseAfterMillis = pauseAfterMillis + range.pauseAfterMillis,
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -698,9 +706,10 @@ internal fun buildReaderContentPlan(book: Fb2Book): ReaderContentPlan {
|
||||
start = 0,
|
||||
endExclusive = section.title!!.length,
|
||||
text = section.title!!,
|
||||
pauseBeforeMillis = HeadingPauseBeforeMillis,
|
||||
pauseBeforeMillis = HeadingPauseBeforeMillis + pendingPauseBeforeMillis,
|
||||
pauseAfterMillis = HeadingPauseAfterMillis,
|
||||
)
|
||||
pendingPauseBeforeMillis = 0L
|
||||
}
|
||||
section.readableBlocks().forEach { block ->
|
||||
val itemIndex = elements.size
|
||||
@ -756,11 +765,22 @@ internal sealed interface ReaderElement {
|
||||
private data class ReaderSentenceRange(
|
||||
val start: Int,
|
||||
val endExclusive: Int,
|
||||
val pauseAfterMillis: Long = 0L,
|
||||
)
|
||||
|
||||
private fun Fb2Text.plainText(): String =
|
||||
spans.joinToString(separator = "") { it.text }
|
||||
|
||||
private fun String.isReadAloudPauseBreak(): Boolean {
|
||||
val compact = trim()
|
||||
return compact.length >= 3 && compact.all { it == '*' || it.isWhitespace() }
|
||||
}
|
||||
|
||||
private fun String.toReadAloudSpokenText(): String =
|
||||
replace(Regex("\\.{2,}"), ".")
|
||||
.replace('…', '.')
|
||||
.trim()
|
||||
|
||||
private fun String.sentenceRanges(): List<ReaderSentenceRange> {
|
||||
val ranges = mutableListOf<ReaderSentenceRange>()
|
||||
var start = 0
|
||||
@ -773,9 +793,15 @@ private fun String.sentenceRanges(): List<ReaderSentenceRange> {
|
||||
var index = start
|
||||
while (index < length) {
|
||||
if (this[index].isSentenceTerminator()) {
|
||||
var end = index + 1
|
||||
val terminatorEnd = sentenceTerminatorEnd(index)
|
||||
val pauseAfterMillis = if (hasReadAloudEllipsisAt(index, terminatorEnd)) {
|
||||
EllipsisPauseAfterMillis
|
||||
} else {
|
||||
0L
|
||||
}
|
||||
var end = terminatorEnd
|
||||
while (end < length && this[end] in "\"'»”’)]}") end += 1
|
||||
ranges += ReaderSentenceRange(start, end)
|
||||
ranges += ReaderSentenceRange(start, end, pauseAfterMillis)
|
||||
start = end
|
||||
skipLeadingWhitespace()
|
||||
index = start
|
||||
@ -789,11 +815,23 @@ private fun String.sentenceRanges(): List<ReaderSentenceRange> {
|
||||
return ranges
|
||||
}
|
||||
|
||||
private fun String.sentenceTerminatorEnd(index: Int): Int {
|
||||
if (this[index] != '.') return index + 1
|
||||
var end = index + 1
|
||||
while (end < length && this[end] == '.') end += 1
|
||||
return end
|
||||
}
|
||||
|
||||
private fun String.hasReadAloudEllipsisAt(index: Int, terminatorEnd: Int): Boolean =
|
||||
this[index] == '…' || (this[index] == '.' && terminatorEnd - index >= 2)
|
||||
|
||||
private fun Char.isSentenceTerminator(): Boolean =
|
||||
this == '.' || this == '!' || this == '?' || this == '…'
|
||||
|
||||
private const val HeadingPauseBeforeMillis = 1_000L
|
||||
private const val HeadingPauseAfterMillis = 600L
|
||||
private const val StarBreakPauseMillis = 1_200L
|
||||
private const val EllipsisPauseAfterMillis = 350L
|
||||
|
||||
private fun List<Fb2Section>.flattenSections(depth: Int = 0): List<ChapterEntry> =
|
||||
flatMapIndexed { index, section ->
|
||||
|
||||
@ -21,6 +21,8 @@ import androidx.compose.material.icons.filled.PlayArrow
|
||||
import androidx.compose.material.icons.filled.Replay
|
||||
import androidx.compose.material.icons.filled.Settings
|
||||
import androidx.compose.material.icons.filled.Stop
|
||||
import androidx.compose.material3.AlertDialog
|
||||
import androidx.compose.material3.CircularProgressIndicator
|
||||
import androidx.compose.material3.DropdownMenu
|
||||
import androidx.compose.material3.DropdownMenuItem
|
||||
import androidx.compose.material3.ExperimentalMaterial3Api
|
||||
@ -34,6 +36,7 @@ import androidx.compose.material3.SnackbarHost
|
||||
import androidx.compose.material3.SnackbarHostState
|
||||
import androidx.compose.material3.Surface
|
||||
import androidx.compose.material3.Text
|
||||
import androidx.compose.material3.TextButton
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.runtime.DisposableEffect
|
||||
import androidx.compose.runtime.LaunchedEffect
|
||||
@ -74,7 +77,9 @@ internal fun BookView(
|
||||
var restored by remember(fileId) { mutableStateOf(false) }
|
||||
var markedRead by remember(fileId) { mutableStateOf(false) }
|
||||
var readAloudPanelVisible by remember(fileId) { mutableStateOf(false) }
|
||||
var readAloudSettingsVisible by remember(fileId) { mutableStateOf(false) }
|
||||
val readAloudState by ReadAloudPlatform.state.collectAsState()
|
||||
val readAloudSettings by ReadAloudPlatform.settingsState.collectAsState()
|
||||
val platformName = getPlatform().name
|
||||
val showShareAction = platformName.startsWith("Android")
|
||||
val showViewFileAction = platformName.startsWith("Java")
|
||||
@ -249,11 +254,24 @@ internal fun BookView(
|
||||
},
|
||||
onBack = { ReadAloudPlatform.skip(-1) },
|
||||
onForward = { ReadAloudPlatform.skip(1) },
|
||||
onSettings = {
|
||||
ReadAloudPlatform.refreshSettings()
|
||||
readAloudSettingsVisible = true
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (readAloudSettingsVisible) {
|
||||
ReadAloudSettingsDialog(
|
||||
state = readAloudSettings,
|
||||
onDismiss = { readAloudSettingsVisible = false },
|
||||
onEngineSelected = { ReadAloudPlatform.selectEngine(it) },
|
||||
onVoiceSelected = { ReadAloudPlatform.selectVoice(it) },
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@Composable
|
||||
@ -372,6 +390,7 @@ private fun ReadAloudPanel(
|
||||
onPlayStop: () -> Unit,
|
||||
onBack: () -> Unit,
|
||||
onForward: () -> Unit,
|
||||
onSettings: () -> Unit,
|
||||
) {
|
||||
Surface(
|
||||
tonalElevation = 3.dp,
|
||||
@ -396,9 +415,95 @@ private fun ReadAloudPanel(
|
||||
IconButton(onClick = onForward) {
|
||||
Icon(Icons.Filled.FastForward, contentDescription = "Next sentence")
|
||||
}
|
||||
IconButton(onClick = {}, enabled = false) {
|
||||
IconButton(onClick = onSettings) {
|
||||
Icon(Icons.Filled.Settings, contentDescription = "Read aloud settings")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Composable
|
||||
private fun ReadAloudSettingsDialog(
|
||||
state: ReadAloudSettingsState,
|
||||
onDismiss: () -> Unit,
|
||||
onEngineSelected: (String?) -> Unit,
|
||||
onVoiceSelected: (String?) -> Unit,
|
||||
) {
|
||||
var engineMenuOpen by remember { mutableStateOf(false) }
|
||||
var voiceMenuOpen by remember { mutableStateOf(false) }
|
||||
val selectedEngineLabel = state.engines.firstOrNull { it.id == state.selectedEngineId }?.label ?: "System default"
|
||||
val selectedVoiceLabel = state.voices.firstOrNull { it.id == state.selectedVoiceId }
|
||||
?.let { "${it.label} (${it.localeTag})" }
|
||||
?: "Auto Russian"
|
||||
|
||||
AlertDialog(
|
||||
onDismissRequest = onDismiss,
|
||||
title = { Text("Read aloud") },
|
||||
text = {
|
||||
Column {
|
||||
Text("TTS engine", style = MaterialTheme.typography.labelMedium)
|
||||
Box {
|
||||
TextButton(onClick = { engineMenuOpen = true }) {
|
||||
Text(selectedEngineLabel)
|
||||
}
|
||||
DropdownMenu(expanded = engineMenuOpen, onDismissRequest = { engineMenuOpen = false }) {
|
||||
DropdownMenuItem(
|
||||
text = { Text("System default") },
|
||||
onClick = {
|
||||
engineMenuOpen = false
|
||||
onEngineSelected(null)
|
||||
},
|
||||
)
|
||||
state.engines.forEach { engine ->
|
||||
DropdownMenuItem(
|
||||
text = { Text(engine.label) },
|
||||
onClick = {
|
||||
engineMenuOpen = false
|
||||
onEngineSelected(engine.id)
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Text("Offline voice", style = MaterialTheme.typography.labelMedium)
|
||||
Box {
|
||||
TextButton(onClick = { voiceMenuOpen = true }, enabled = !state.loading && state.voices.isNotEmpty()) {
|
||||
Text(selectedVoiceLabel)
|
||||
}
|
||||
DropdownMenu(expanded = voiceMenuOpen, onDismissRequest = { voiceMenuOpen = false }) {
|
||||
DropdownMenuItem(
|
||||
text = { Text("Auto Russian") },
|
||||
onClick = {
|
||||
voiceMenuOpen = false
|
||||
onVoiceSelected(null)
|
||||
},
|
||||
)
|
||||
state.voices.forEach { voice ->
|
||||
DropdownMenuItem(
|
||||
text = { Text("${voice.label} (${voice.localeTag})") },
|
||||
onClick = {
|
||||
voiceMenuOpen = false
|
||||
onVoiceSelected(voice.id)
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (state.loading) {
|
||||
Row(verticalAlignment = Alignment.CenterVertically) {
|
||||
CircularProgressIndicator()
|
||||
Text("Loading voices...", modifier = Modifier.padding(start = 12.dp))
|
||||
}
|
||||
}
|
||||
state.message?.let { Text(it, color = MaterialTheme.colorScheme.error) }
|
||||
}
|
||||
},
|
||||
confirmButton = {
|
||||
TextButton(onClick = onDismiss) {
|
||||
Text("Done")
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@ -0,0 +1,58 @@
|
||||
package net.sergeych.toread
|
||||
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
import net.sergeych.toread.fb2.Fb2Block
|
||||
import net.sergeych.toread.fb2.Fb2Book
|
||||
import net.sergeych.toread.fb2.Fb2Section
|
||||
import net.sergeych.toread.fb2.Fb2Text
|
||||
import net.sergeych.toread.fb2.Fb2TextSpan
|
||||
|
||||
class ReadAloudContentPlanTest {
|
||||
@Test
|
||||
fun starBreakAddsPauseBeforeNextSpokenSentence() {
|
||||
val plan = buildReaderContentPlan(
|
||||
Fb2Book(
|
||||
title = "Book",
|
||||
sections = listOf(
|
||||
Fb2Section(
|
||||
blocks = listOf(
|
||||
paragraph("Before."),
|
||||
paragraph("* * *"),
|
||||
paragraph("After."),
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
assertEquals(listOf("Before.", "After."), plan.sentences.map { it.text })
|
||||
assertEquals(0L, plan.sentences[0].pauseBeforeMillis)
|
||||
assertEquals(1_200L, plan.sentences[1].pauseBeforeMillis)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun ellipsisIsCollapsedForSpeechAndAddsShortPause() {
|
||||
val plan = buildReaderContentPlan(
|
||||
Fb2Book(
|
||||
title = "Book",
|
||||
sections = listOf(
|
||||
Fb2Section(blocks = listOf(paragraph("Wait... Go.. Stop… Done."))),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
assertEquals(
|
||||
listOf("Wait...", "Go..", "Stop…", "Done."),
|
||||
plan.sentences.map { it.text },
|
||||
)
|
||||
assertEquals(
|
||||
listOf("Wait.", "Go.", "Stop.", "Done."),
|
||||
plan.sentences.map { it.spokenText },
|
||||
)
|
||||
assertEquals(listOf(350L, 350L, 350L, 0L), plan.sentences.map { it.pauseAfterMillis })
|
||||
}
|
||||
|
||||
private fun paragraph(text: String): Fb2Block.Paragraph =
|
||||
Fb2Block.Paragraph(Fb2Text(listOf(Fb2TextSpan(text))))
|
||||
}
|
||||
@ -7,9 +7,14 @@ actual object ReadAloudPlatform {
|
||||
actual val isSupported: Boolean = false
|
||||
private val mutableState = MutableStateFlow(ReadAloudState())
|
||||
actual val state: StateFlow<ReadAloudState> = mutableState
|
||||
private val mutableSettingsState = MutableStateFlow(ReadAloudSettingsState())
|
||||
actual val settingsState: StateFlow<ReadAloudSettingsState> = mutableSettingsState
|
||||
|
||||
actual fun prepare(bookTitle: String, sentences: List<ReadAloudSentence>, startIndex: Int) = Unit
|
||||
actual fun play() = Unit
|
||||
actual fun stop() = Unit
|
||||
actual fun skip(delta: Int) = Unit
|
||||
actual fun refreshSettings() = Unit
|
||||
actual fun selectEngine(engineId: String?) = Unit
|
||||
actual fun selectVoice(voiceId: String?) = Unit
|
||||
}
|
||||
|
||||
@ -7,9 +7,14 @@ actual object ReadAloudPlatform {
|
||||
actual val isSupported: Boolean = false
|
||||
private val mutableState = MutableStateFlow(ReadAloudState())
|
||||
actual val state: StateFlow<ReadAloudState> = mutableState
|
||||
private val mutableSettingsState = MutableStateFlow(ReadAloudSettingsState())
|
||||
actual val settingsState: StateFlow<ReadAloudSettingsState> = mutableSettingsState
|
||||
|
||||
actual fun prepare(bookTitle: String, sentences: List<ReadAloudSentence>, startIndex: Int) = Unit
|
||||
actual fun play() = Unit
|
||||
actual fun stop() = Unit
|
||||
actual fun skip(delta: Int) = Unit
|
||||
actual fun refreshSettings() = Unit
|
||||
actual fun selectEngine(engineId: String?) = Unit
|
||||
actual fun selectVoice(voiceId: String?) = Unit
|
||||
}
|
||||
|
||||
181
docs/russian-hyphenation-rules.md
Normal file
181
docs/russian-hyphenation-rules.md
Normal file
@ -0,0 +1,181 @@
|
||||
# Russian Hyphenation Rules
|
||||
|
||||
This document defines the rule set for the Russian hyphenation plugin. The
|
||||
implementation should return the original word with soft hyphens inserted at
|
||||
preferred transfer points. Tests may render soft hyphens as `-` for readability.
|
||||
|
||||
The goal is not perfect dictionary hyphenation. The goal is a deterministic,
|
||||
readable rule set that avoids invalid transfers and produces better break points
|
||||
than the current vowel/consonant heuristic.
|
||||
|
||||
## Character Classes
|
||||
|
||||
Use lowercase character checks for classification.
|
||||
|
||||
- Vowels: `а е ё и о у ы э ю я`
|
||||
- Consonants: Russian letters except vowels, `ь`, `ъ`, and `й`
|
||||
- Non-syllabic letters: `ь ъ`
|
||||
- Semivowel: `й`
|
||||
|
||||
Only hyphenate words made of Russian letters. Leave mixed words, words with
|
||||
digits, existing soft hyphens, and abbreviations unchanged. Short words are
|
||||
hyphenated only when they pass the same legal side-length and vowel filters as
|
||||
longer words.
|
||||
|
||||
## Hard Legal Filters
|
||||
|
||||
A candidate break is illegal if any of these is true:
|
||||
|
||||
- Either side would have fewer than 2 letters.
|
||||
- Either side would contain no vowel.
|
||||
- The right side starts with `ь`, `ъ`, `й`, or `ы`.
|
||||
- The left side ends before `ь` or `ъ`; that is, do not split `под-ъезд` or
|
||||
`бол-ьшой`. Prefer `подъ-езд`, `боль-шой`.
|
||||
- The left side ends before `й`; that is, do not split `ма-йор` or `во-йна`.
|
||||
Prefer `май-ор`, `вой-на`.
|
||||
- The break separates a consonant from a following vowel: reject `люб-овь`,
|
||||
`паст-ух`, `реб-ята`. Prefer `лю-бовь`, `па-стух` or `пас-тух`, `ре-бята`
|
||||
or `ребя-та`.
|
||||
|
||||
## Candidate Generation
|
||||
|
||||
Work between adjacent vowel nuclei. For each span from one vowel to the next,
|
||||
choose preferred break candidates from the consonant cluster between them.
|
||||
|
||||
### Adjacent Vowels
|
||||
|
||||
If two vowels are adjacent, allow a break between them when both resulting parts
|
||||
pass the legal filters.
|
||||
|
||||
Examples:
|
||||
|
||||
- `поэт` -> `по-эт`
|
||||
- `академия` -> `ака-де-мия` and not `а-кадемия` or `академи-я`
|
||||
|
||||
### One Consonant Between Vowels
|
||||
|
||||
For `V C V`, break before the consonant.
|
||||
|
||||
Examples:
|
||||
|
||||
- `молоко` -> `мо-ло-ко`
|
||||
- `корова` -> `ко-ро-ва`
|
||||
- `переход` -> `пе-ре-ход`
|
||||
|
||||
### Two Consonants Between Vowels
|
||||
|
||||
For `V C C V`, prefer a break between the consonants.
|
||||
|
||||
Examples:
|
||||
|
||||
- `лампа` -> `лам-па`
|
||||
- `гордый` -> `гор-дый`
|
||||
- `письмо` -> `пись-мо`
|
||||
|
||||
If the cluster contains `й`, `ь`, or `ъ`, keep that letter on the left and break
|
||||
after it when legal.
|
||||
|
||||
Examples:
|
||||
|
||||
- `майор` -> `май-ор`
|
||||
- `подъезд` -> `подъ-езд`
|
||||
- `большой` -> `боль-шой`
|
||||
|
||||
### Three Or More Consonants Between Vowels
|
||||
|
||||
For longer clusters, prefer the latest break that still leaves a pronounceable
|
||||
left part, but keep common inseparable starts on the right:
|
||||
|
||||
- Keep `ст`, `ск`, `сп`, `сн`, `сл`, `см`, `св` together on the right when
|
||||
possible.
|
||||
- Keep stop/liquid pairs together on the right when possible: `бр`, `бл`,
|
||||
`вр`, `вл`, `гр`, `гл`, `др`, `тр`, `кр`, `кл`, `пр`, `пл`, `фр`, `фл`.
|
||||
- Otherwise prefer splitting before the last consonant in the cluster.
|
||||
|
||||
Examples:
|
||||
|
||||
- `сестра` -> `се-стра`
|
||||
- `острый` should not use `о-стрый`, because the left side is too short
|
||||
- `родство` -> `род-ство`
|
||||
- `чувство` -> `чув-ство`
|
||||
- `предложение` -> `пред-ло-же-ние`
|
||||
|
||||
## Double Consonants
|
||||
|
||||
When two identical consonants stand between vowels, prefer splitting between
|
||||
them.
|
||||
|
||||
Examples:
|
||||
|
||||
- `масса` -> `мас-са`
|
||||
- `длинный` -> `длин-ный`
|
||||
- `касса` -> `кас-са`
|
||||
|
||||
Do not force this rule when the double consonant starts a root after a prefix.
|
||||
Without a dictionary, this exception is hard to detect, so implementation may
|
||||
leave such words to the general cluster logic.
|
||||
|
||||
## Prefix-Like Boundaries
|
||||
|
||||
Without a morphology dictionary, treat these as preferred heuristics only.
|
||||
|
||||
If a word starts with a common prefix and the following part is legal, prefer a
|
||||
break after the prefix:
|
||||
|
||||
- `без`, `бес`, `воз`, `вос`, `вз`, `вс`, `из`, `ис`, `низ`, `нис`, `раз`,
|
||||
`рас`, `роз`, `рос`, `от`, `об`, `объ`, `под`, `подъ`, `пред`, `пере`,
|
||||
`при`, `про`, `над`, `сверх`, `меж`
|
||||
|
||||
Examples:
|
||||
|
||||
- `подбить` -> `под-бить`
|
||||
- `размах` -> `раз-мах`
|
||||
- `предложение` -> `пред-ло-же-ние`
|
||||
- `подъезд` -> `подъ-езд`
|
||||
|
||||
Do not create a right side starting with `ы`; prefer later legal breaks.
|
||||
|
||||
Examples:
|
||||
|
||||
- `разыскать` -> `ра-зыскать` or `разыс-кать`, not `раз-ыскать`
|
||||
- `розыгрыш` -> `ро-зыгрыш` or `розыг-рыш`, not `роз-ыгрыш`
|
||||
|
||||
## Ranking
|
||||
|
||||
A word may have several legal break points. The plugin should insert all good
|
||||
breaks, but it should avoid noisy low-quality breaks. Use this ranking:
|
||||
|
||||
1. Prefix boundary, if legal.
|
||||
2. Double consonant split between vowels.
|
||||
3. Syllable breaks from adjacent vowel and consonant-cluster rules.
|
||||
4. Longer-cluster fallback break before the last consonant.
|
||||
|
||||
Reject candidates that are legal but awkward when a better candidate is within
|
||||
one character and both candidates divide the same vowel-to-vowel span.
|
||||
|
||||
## Example Expectations
|
||||
|
||||
These strings use `-` where the implementation will insert `SoftHyphen`.
|
||||
|
||||
```text
|
||||
молоко -> мо-ло-ко
|
||||
корова -> ко-ро-ва
|
||||
яблоко -> яб-ло-ко
|
||||
повествование -> по-вест-во-ва-ние
|
||||
предложение -> пред-ло-же-ние
|
||||
компьютер -> компью-тер
|
||||
подъезд -> подъ-езд
|
||||
большой -> боль-шой
|
||||
майор -> май-ор
|
||||
масса -> мас-са
|
||||
длинный -> длин-ный
|
||||
разыскать -> ра-зыс-кать
|
||||
розыгрыш -> ро-зыг-рыш
|
||||
```
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Full dictionary-level hyphenation.
|
||||
- Stress-aware syllabification.
|
||||
- Exact morpheme detection for every prefix/root boundary.
|
||||
- Hyphenating proper abbreviations and mixed-script technical identifiers.
|
||||
@ -29,22 +29,23 @@ class HyphenationRegistry(
|
||||
private fun hyphenate(text: String, plugin: HyphenationPlugin): String = buildString(text.length + text.length / 12) {
|
||||
var wordStart = -1
|
||||
|
||||
fun flushWord(end: Int) {
|
||||
fun flushToken(end: Int) {
|
||||
if (wordStart >= 0) {
|
||||
append(plugin.hyphenateWord(text.substring(wordStart, end)))
|
||||
val token = text.substring(wordStart, end)
|
||||
append(if (SoftHyphen !in token && token.all(Char::isLetter)) plugin.hyphenateWord(token) else token)
|
||||
wordStart = -1
|
||||
}
|
||||
}
|
||||
|
||||
text.forEachIndexed { index, char ->
|
||||
if (char.isLetter()) {
|
||||
if (char.isLetterOrDigit() || char == SoftHyphen) {
|
||||
if (wordStart < 0) wordStart = index
|
||||
} else {
|
||||
flushWord(index)
|
||||
flushToken(index)
|
||||
append(char)
|
||||
}
|
||||
}
|
||||
flushWord(text.length)
|
||||
flushToken(text.length)
|
||||
}
|
||||
}
|
||||
|
||||
@ -71,18 +72,123 @@ object RussianHyphenationPlugin : HyphenationPlugin {
|
||||
override val languageTags: Set<String> = setOf("ru", "rus")
|
||||
|
||||
override fun hyphenateWord(word: String): String {
|
||||
if (word.length < 6 || SoftHyphen in word) return word
|
||||
val breaks = mutableListOf<Int>()
|
||||
for (index in 2 until word.lastIndex) {
|
||||
val prev = word[index - 1]
|
||||
val current = word[index]
|
||||
val next = word[index + 1]
|
||||
if (current.isRussianVowel() && next.isRussianConsonant()) breaks += index + 1
|
||||
if (prev.isRussianVowel() && current.isRussianConsonant() && next.isRussianVowel()) breaks += index
|
||||
if (SoftHyphen in word || !word.isRussianWord() || word.isLikelyAbbreviation()) return word
|
||||
|
||||
val lower = word.lowercase()
|
||||
val candidates = mutableSetOf<Int>()
|
||||
addRussianPrefixBreak(lower, candidates)
|
||||
addRussianSyllableBreaks(lower, candidates)
|
||||
|
||||
return insertBreaks(word, candidates.filter { lower.isLegalRussianBreak(it) }, minPrefix = 2, minSuffix = 2)
|
||||
}
|
||||
return insertBreaks(word, breaks, minPrefix = 2, minSuffix = 2)
|
||||
|
||||
private fun addRussianPrefixBreak(word: String, candidates: MutableSet<Int>) {
|
||||
RussianPrefixes.firstOrNull { prefix -> word.startsWith(prefix) }
|
||||
?.length
|
||||
?.let(candidates::add)
|
||||
}
|
||||
|
||||
private fun addRussianSyllableBreaks(word: String, candidates: MutableSet<Int>) {
|
||||
val vowelIndexes = word.indices.filter { word[it].isRussianVowel() }
|
||||
vowelIndexes.zipWithNext().forEach { (leftVowel, rightVowel) ->
|
||||
val clusterStart = leftVowel + 1
|
||||
val clusterEnd = rightVowel
|
||||
val clusterLength = clusterEnd - clusterStart
|
||||
when {
|
||||
clusterLength == 0 -> candidates += rightVowel
|
||||
clusterLength == 1 -> {
|
||||
val consonant = word[clusterStart]
|
||||
candidates += if (consonant.isRussianJoiner()) clusterStart + 1 else clusterStart
|
||||
}
|
||||
clusterLength == 2 -> {
|
||||
val joiner = (clusterStart until clusterEnd).firstOrNull { word[it].isRussianJoiner() }
|
||||
candidates += if (joiner != null) joiner + 1 else clusterStart + 1
|
||||
}
|
||||
else -> addRussianClusterBreak(word, clusterStart, clusterEnd, candidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun addRussianClusterBreak(
|
||||
word: String,
|
||||
clusterStart: Int,
|
||||
clusterEnd: Int,
|
||||
candidates: MutableSet<Int>,
|
||||
) {
|
||||
val cluster = word.substring(clusterStart, clusterEnd)
|
||||
val joiner = (clusterStart until clusterEnd).firstOrNull { word[it].isRussianJoiner() }
|
||||
if (joiner != null) {
|
||||
if (joiner < clusterEnd - 1) candidates += joiner + 1
|
||||
return
|
||||
}
|
||||
|
||||
if (cluster == "ств") {
|
||||
candidates += clusterStart + 2
|
||||
return
|
||||
}
|
||||
|
||||
if (cluster.endsWith("ств")) {
|
||||
candidates += clusterEnd - 3
|
||||
return
|
||||
}
|
||||
|
||||
val onsetBreak = (clusterStart until clusterEnd)
|
||||
.firstOrNull { index -> RussianRightOnsets.any { onset -> word.startsWith(onset, index) } }
|
||||
candidates += onsetBreak ?: (clusterEnd - 1)
|
||||
}
|
||||
}
|
||||
|
||||
private val RussianPrefixes = listOf(
|
||||
"сверх",
|
||||
"подъ",
|
||||
"пере",
|
||||
"пред",
|
||||
"без",
|
||||
"бес",
|
||||
"воз",
|
||||
"вос",
|
||||
"низ",
|
||||
"нис",
|
||||
"раз",
|
||||
"рас",
|
||||
"роз",
|
||||
"рос",
|
||||
"под",
|
||||
"при",
|
||||
"про",
|
||||
"над",
|
||||
"меж",
|
||||
"вз",
|
||||
"вс",
|
||||
"из",
|
||||
"ис",
|
||||
"от",
|
||||
"об",
|
||||
)
|
||||
|
||||
private val RussianRightOnsets = listOf(
|
||||
"ст",
|
||||
"ск",
|
||||
"сп",
|
||||
"сн",
|
||||
"сл",
|
||||
"см",
|
||||
"св",
|
||||
"бр",
|
||||
"бл",
|
||||
"вр",
|
||||
"вл",
|
||||
"гр",
|
||||
"гл",
|
||||
"др",
|
||||
"тр",
|
||||
"кр",
|
||||
"кл",
|
||||
"пр",
|
||||
"пл",
|
||||
"фр",
|
||||
"фл",
|
||||
)
|
||||
|
||||
const val SoftHyphen: Char = '\u00AD'
|
||||
|
||||
@ -107,4 +213,24 @@ private fun Char.isConsonant(): Boolean = isLetter() && !isVowel()
|
||||
private fun Char.isRussianVowel(): Boolean = lowercaseChar() in "аеёиоуыэюя"
|
||||
|
||||
private fun Char.isRussianConsonant(): Boolean =
|
||||
lowercaseChar() in "бвгджзйклмнпрстфхцчшщ"
|
||||
lowercaseChar() in "бвгджзклмнпрстфхцчшщ"
|
||||
|
||||
private fun Char.isRussianJoiner(): Boolean = lowercaseChar() in "йьъ"
|
||||
|
||||
private fun Char.isRussianLetter(): Boolean = lowercaseChar() in "абвгдеёжзийклмнопрстуфхцчшщъыьэюя"
|
||||
|
||||
private fun String.isRussianWord(): Boolean = all(Char::isRussianLetter)
|
||||
|
||||
private fun String.isLikelyAbbreviation(): Boolean = length > 1 && all { it.isUpperCase() }
|
||||
|
||||
private fun String.isLegalRussianBreak(index: Int): Boolean {
|
||||
if (index < 2 || length - index < 2) return false
|
||||
if (take(index).none(Char::isRussianVowel) || drop(index).none(Char::isRussianVowel)) return false
|
||||
|
||||
val left = this[index - 1]
|
||||
val right = this[index]
|
||||
if (right.lowercaseChar() in "ьъйы") return false
|
||||
if (right.isRussianJoiner()) return false
|
||||
if (left.isRussianConsonant() && right.isRussianVowel()) return false
|
||||
return true
|
||||
}
|
||||
|
||||
@ -1,13 +1,16 @@
|
||||
package net.sergeych.toread.text
|
||||
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
import kotlin.test.assertNotEquals
|
||||
import kotlin.test.assertTrue
|
||||
|
||||
class HyphenationTest {
|
||||
private val hyphenation = HyphenationRegistry()
|
||||
|
||||
@Test
|
||||
fun selectsEnglishPluginByLanguage() {
|
||||
val hyphenated = HyphenationRegistry().hyphenate("composition", "en")
|
||||
val hyphenated = hyphenation.hyphenate("composition", "en")
|
||||
|
||||
assertNotEquals("composition", hyphenated)
|
||||
assertTrue(SoftHyphen in hyphenated)
|
||||
@ -15,9 +18,41 @@ class HyphenationTest {
|
||||
|
||||
@Test
|
||||
fun selectsRussianPluginByLanguage() {
|
||||
val hyphenated = HyphenationRegistry().hyphenate("повествование", "ru")
|
||||
val hyphenated = hyphenation.hyphenate("повествование", "ru")
|
||||
|
||||
assertNotEquals("повествование", hyphenated)
|
||||
assertTrue(SoftHyphen in hyphenated)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun suggestsRussianHyphensAtReadableBreaks() {
|
||||
mapOf(
|
||||
"молоко" to "мо-ло-ко",
|
||||
"корова" to "ко-ро-ва",
|
||||
"яблоко" to "яб-ло-ко",
|
||||
"повествование" to "по-вест-во-ва-ние",
|
||||
"предложение" to "пред-ло-же-ние",
|
||||
"компьютер" to "компью-тер",
|
||||
"подъезд" to "подъ-езд",
|
||||
"большой" to "боль-шой",
|
||||
"майор" to "май-ор",
|
||||
"масса" to "мас-са",
|
||||
"длинный" to "длин-ный",
|
||||
"разыскать" to "ра-зыс-кать",
|
||||
"розыгрыш" to "ро-зыг-рыш",
|
||||
).forEach { (word, expected) ->
|
||||
assertEquals(expected, word.hyphenatedRu())
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun keepsRussianWordsWithoutLegalReadableBreaksUnchanged() {
|
||||
listOf("дом", "мир", "стол", "семья", "СССР", "testовый", "книга123").forEach { word ->
|
||||
assertEquals(word, word.hyphenatedRu())
|
||||
}
|
||||
assertEquals("ко-рова", "ко${SoftHyphen}рова".hyphenatedRu())
|
||||
}
|
||||
|
||||
private fun String.hyphenatedRu(): String =
|
||||
hyphenation.hyphenate(this, "ru").replace(SoftHyphen, '-')
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user