-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f872866
commit 354d3f9
Showing
4 changed files
with
97 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,50 @@ | ||
package pro.dionea.service | ||
|
||
class IdentifyLang(val lex: Set<String>) { | ||
class IdentifyLang(private val lex: Set<String>) { | ||
private val englishRange = 'a' .. 'z' | ||
private val russianRange = 'а'.. 'я' | ||
|
||
enum class Lang { | ||
RUS, ENG | ||
RUS, ENG, MIXED, UNDEFINED | ||
} | ||
|
||
private fun Set<String>.countBy(range: CharRange): Int = | ||
this.count { word -> word.any { it in range } } | ||
flatMap { it.asSequence() } | ||
.count { it in range } | ||
|
||
fun lang() : Lang = if (lex.countBy(englishRange) >= lex.countBy(russianRange)) { | ||
Lang.ENG | ||
} else { | ||
Lang.RUS | ||
fun sizeByLang() : Map<Lang, Int> | ||
= mapOf( | ||
Lang.RUS to lex.countBy(russianRange), | ||
Lang.ENG to lex.countBy(englishRange) | ||
) | ||
|
||
private fun String.lang() : Lang { | ||
val rus = count { it in russianRange } | ||
val eng = count { it in englishRange } | ||
return if (rus != 0 && eng != 0) { | ||
Lang.MIXED | ||
} else if (rus != 0) { | ||
Lang.RUS | ||
} else if (eng != 0) { | ||
Lang.ENG | ||
} else { | ||
Lang.UNDEFINED | ||
} | ||
} | ||
|
||
fun lang() : Lang { | ||
var lang = Lang.UNDEFINED | ||
for (word in lex) { | ||
val wordLang = word.lang() | ||
if (wordLang == Lang.UNDEFINED) { | ||
continue | ||
} | ||
if (lang == Lang.UNDEFINED) { | ||
lang = wordLang | ||
} else if (lang != wordLang) { | ||
return Lang.MIXED | ||
} | ||
} | ||
return lang | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters