@@ -33,16 +33,23 @@ public static Optional<LdLocale> getRecognisedLanguage(String text) throws IOExc
33
33
return languageDetector .detect (textObject );
34
34
}
35
35
36
+ public static Optional <LdLocale > getRecognisedLanguage (WebDriver driver ) throws IOException {
37
+ List <LanguageProfile > languageProfiles = new LanguageProfileReader ().readAllBuiltIn ();
38
+
39
+ LanguageDetector languageDetector = LanguageDetectorBuilder .create (NgramExtractors .standard ())
40
+ .withProfiles (languageProfiles )
41
+ .build ();
42
+
43
+ TextObjectFactory textObjectFactory = CommonTextObjectFactories .forDetectingOnLargeText ();
44
+
45
+ TextObject textObject = textObjectFactory .forText (getTextFromPage (driver ));
46
+
47
+ return languageDetector .detect (textObject );
48
+ }
49
+
36
50
public static boolean isCorrectLanguageOnThePage (WebDriver driver , String lang ) throws IOException {
37
51
boolean isCorrectLang = true ;
38
- JavascriptExecutor jse = (JavascriptExecutor ) driver ;
39
- String bodyText = jse .executeScript ("return document.body.innerHTML" , "" ).toString ();
40
- bodyText = bodyText .replaceAll ("<script\\ b[^<]*(?:(?!<\\ /script>)<[^<]*)*<\\ /script>" , " " );
41
- bodyText = bodyText .replaceAll ("<noscript\\ b[^<]*(?:(?!<\\ /noscript>)<[^<]*)*<\\ /noscript>" , " " );
42
- bodyText = bodyText .replaceAll ("<style\\ b[^<]*(?:(?!<\\ /style>)<[^<]*)*<\\ /style>" , " " );
43
- bodyText = bodyText .replaceAll ("<pre\\ b[^<]*(?:(?!<\\ /pre>)<[^<]*)*<\\ /pre>" , " " );
44
- bodyText = bodyText .replaceAll ("<[^>]*>" , " " );
45
- bodyText = bodyText .toLowerCase ().replaceAll ("[\\ t|\\ n|\\ r|\\ s]+" , " " ).replaceAll ("[\\ s]+" , " " );
52
+ String bodyText = getTextFromPage (driver );
46
53
47
54
int textBlockLength = 300 ;
48
55
int bodyTextLength = bodyText .length ();
@@ -53,7 +60,7 @@ public static boolean isCorrectLanguageOnThePage(WebDriver driver, String lang)
53
60
} else {
54
61
for (int i = 0 ; i < bodyTextLength ; i += textBlockLength ) {
55
62
String tempString ;
56
- if (bodyTextLength >= (i + textBlockLength ) ) {
63
+ if (bodyTextLength >= (i + textBlockLength )) {
57
64
tempString = bodyText .substring (i , i + textBlockLength );
58
65
try {
59
66
String detectedLanguage = getRecognisedLanguage (tempString ).get ().getLanguage ();
@@ -75,4 +82,17 @@ public static boolean isCorrectLanguageOnThePage(WebDriver driver, String lang)
75
82
}
76
83
return isCorrectLang ;
77
84
}
85
+
86
+ private static String getTextFromPage (WebDriver driver ) {
87
+ JavascriptExecutor jse = (JavascriptExecutor ) driver ;
88
+ String bodyText = jse .executeScript ("return document.body.innerHTML" , "" ).toString ();
89
+ bodyText = bodyText .replaceAll ("<script\\ b[^<]*(?:(?!</script>)<[^<]*)*</script>" , " " )
90
+ .replaceAll ("<noscript\\ b[^<]*(?:(?!</noscript>)<[^<]*)*</noscript>" , " " )
91
+ .replaceAll ("<style\\ b[^<]*(?:(?!</style>)<[^<]*)*</style>" , " " )
92
+ .replaceAll ("<pre\\ b[^<]*(?:(?!</pre>)<[^<]*)*</pre>" , " " )
93
+ .replaceAll ("<[^>]*>" , " " ).toLowerCase ()
94
+ .replaceAll ("[\\ t|\\ n|\\ r|\\ s]+" , " " ).replaceAll ("[\\ s]+" , " " );
95
+
96
+ return bodyText ;
97
+ }
78
98
}
0 commit comments