diff --git a/.github/check_chinese_character.py b/.github/check_chinese_character.py
new file mode 100644
index 000000000..51dc71af8
--- /dev/null
+++ b/.github/check_chinese_character.py
@@ -0,0 +1,95 @@
+import os
+import re
+from pathlib import Path
+from typing import List, Set
+
+class ChineseCharacterCheckTest:
+ CHINESE_CHAR_PATTERN = re.compile(r'[\u4e00-\u9fa5]')
+ # Exclude directories or files. If it is a file, just write the file name. The same is true for directories, just write the directory name.
+ EXCLUDED_DIRS_AND_FILES = {
+ "target",
+ "node_modules",
+ "dist",
+ }
+ # Supported file extensions
+ SUPPORTED_EXTENSIONS = {".java", ".kt", ".scala", ".js", ".ts", ".vue"}
+
+ def should_not_contain_chinese_in_comments(self):
+ violations = self.scan_for_chinese_characters(ScanTarget.COMMENTS)
+ self.assert_no_chinese_characters(violations)
+
+ def scan_for_chinese_characters(self, target: 'ScanTarget') -> List[str]:
+ violations = []
+ for ext in self.SUPPORTED_EXTENSIONS:
+ for path in Path("..").rglob(f"*{ext}"):
+ if self.is_valid_file(path) and not self.is_excluded(path):
+ self.process_file(path, target, violations)
+ return violations
+
+ def is_excluded(self, path: Path) -> bool:
+ path_str = str(path)
+ return any(excluded in path_str for excluded in self.EXCLUDED_DIRS_AND_FILES)
+
+ def is_valid_file(self, path: Path) -> bool:
+ path_str = str(path)
+ return any(path_str.endswith(ext) for ext in self.SUPPORTED_EXTENSIONS)
+
+ def process_file(self, path: Path, target: 'ScanTarget', violations: List[str]):
+ try:
+ with open(path, 'r', encoding='utf-8') as file:
+ content = file.read()
+ if target.include_comments():
+ self.check_comments(content, path, violations)
+ if target.include_code():
+ self.check_code(content, path, violations)
+ except Exception as e:
+ print(f"Error processing file: {path}")
+ print(e)
+
+ def check_comments(self, content: str, path: Path, violations: List[str]):
+ # Matching multiple types of comments
+ comment_patterns = [
+ r'//.*?$', # Single line comments
+ r'/\*.*?\*/', # Multi line comments
+ r'' # Vue/HTML,/javascript/typescript comments
+ ]
+ for pattern in comment_patterns:
+ for comment in re.findall(pattern, content, re.DOTALL | re.MULTILINE):
+ if self.CHINESE_CHAR_PATTERN.search(comment):
+ violations.append(self.format_violation(path, "comment", comment.strip()))
+
+ def check_code(self, content: str, path: Path, violations: List[str]):
+ # Matching string literals in multiple languages
+ string_patterns = [
+ r'"[^"]*"', # Double quoted strings
+ r"'[^']*'" # Single quoted strings
+ ]
+ for pattern in string_patterns:
+ for string_literal in re.findall(pattern, content):
+ if self.CHINESE_CHAR_PATTERN.search(string_literal):
+ violations.append(self.format_violation(path, "code", string_literal))
+
+ def format_violation(self, path: Path, location: str, content: str) -> str:
+ return f"Chinese characters found in {location} at {path.absolute()}: {content}"
+
+ def assert_no_chinese_characters(self, violations: List[str]):
+ assert len(violations) == 0, f"Found Chinese characters in files:\n{os.linesep.join(violations)}"
+
+class ScanTarget:
+ def __init__(self, check_comments: bool, check_code: bool):
+ self.check_comments = check_comments
+ self.check_code = check_code
+
+ def include_comments(self) -> bool:
+ return self.check_comments
+
+ def include_code(self) -> bool:
+ return self.check_code
+
+ScanTarget.COMMENTS = ScanTarget(True, False)
+ScanTarget.CODE = ScanTarget(False, True)
+ScanTarget.ALL = ScanTarget(True, True)
+
+if __name__ == "__main__":
+ test = ChineseCharacterCheckTest()
+ test.should_not_contain_chinese_in_comments()
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5174d8080..bd91fad8f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -43,19 +43,11 @@ jobs:
with:
submodules: true
- name: Set JDK
- uses: actions/setup-java@v4
+ uses: actions/setup-python@v4
with:
- distribution: 'temurin'
- java-version: '17'
- cache: 'maven'
- - name: Cache local Maven repository
- uses: actions/cache@v4
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-maven-
- - run: ./mvnw clean test -Dskip.pnpm -Dskip.installnodepnpm -Dskip.pnpm.test -DfailIfNoTests=false -Dtest=ChineseCharacterCheckTest
+ python-version: '3.13'
+ cache: 'pip' # caching pip dependencies
+ - run: python .github/check-chinese-character.py
unit-tests-java:
name: "Run unit test(Java)"
diff --git a/bigtop-manager-bom/pom.xml b/bigtop-manager-bom/pom.xml
index 85efae8a2..ce8f88b89 100644
--- a/bigtop-manager-bom/pom.xml
+++ b/bigtop-manager-bom/pom.xml
@@ -53,7 +53,6 @@
0.35.0
3.0.3
2.1.0
- 3.26.3
@@ -279,11 +278,6 @@
langchain4j-reactor
${langchain4j.version}
-
- com.github.javaparser
- javaparser-core
- ${javaparser.version}
-
diff --git a/bigtop-manager-common/pom.xml b/bigtop-manager-common/pom.xml
index f93338cdf..ad71ab9d8 100644
--- a/bigtop-manager-common/pom.xml
+++ b/bigtop-manager-common/pom.xml
@@ -91,10 +91,5 @@
jakarta.annotation-api
-
- com.github.javaparser
- javaparser-core
- test
-
diff --git a/bigtop-manager-common/src/test/java/org/apache/bigtop/manager/common/utils/ChineseCharacterCheckTest.java b/bigtop-manager-common/src/test/java/org/apache/bigtop/manager/common/utils/ChineseCharacterCheckTest.java
deleted file mode 100644
index dc8636ed5..000000000
--- a/bigtop-manager-common/src/test/java/org/apache/bigtop/manager/common/utils/ChineseCharacterCheckTest.java
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.bigtop.manager.common.utils;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.github.javaparser.JavaParser;
-import com.github.javaparser.ParseResult;
-import com.github.javaparser.ast.CompilationUnit;
-import com.github.javaparser.ast.expr.StringLiteralExpr;
-
-import java.io.IOException;
-import java.nio.file.FileVisitOption;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.regex.Pattern;
-import java.util.stream.Stream;
-
-/**
- * Test case for checking Chinese characters in Java files
- */
-public class ChineseCharacterCheckTest {
-
- private static final Logger log = LoggerFactory.getLogger(ChineseCharacterCheckTest.class);
-
- private static final Pattern CHINESE_CHAR_PATTERN = Pattern.compile("[\u4e00-\u9fa5]");
- private static final Set EXCLUDED_FILES = new HashSet<>(Collections.singletonList("Metrics"));
- private static final String MAIN_SOURCE_DIR = "src/main/java";
- private static final String TEST_SOURCE_DIR = "src/test/java";
-
- private final JavaParser javaParser = new JavaParser();
- private final String sourceDir;
- private final String testDir;
-
- public ChineseCharacterCheckTest() {
- boolean isWindowsOs = System.getProperty("os.name").toLowerCase().startsWith("win");
- String separator = isWindowsOs ? "\\" : "/";
- this.sourceDir = MAIN_SOURCE_DIR.replace("/", separator);
- this.testDir = TEST_SOURCE_DIR.replace("/", separator);
- }
-
- @Test
- void shouldNotContainChineseInComments() {
- List violations = scanForChineseCharacters(ScanTarget.COMMENTS);
- assertNoChineseCharacters(violations);
- }
-
- private List scanForChineseCharacters(ScanTarget target) {
- List violations = new ArrayList<>();
- try (Stream paths = Files.walk(Paths.get(".."), FileVisitOption.FOLLOW_LINKS)) {
- paths.filter(this::isValidJavaFile).forEach(path -> processFile(path, target, violations));
- } catch (IOException e) {
- throw new RuntimeException("Failed to scan Java files", e);
- }
- return violations;
- }
-
- private boolean isValidJavaFile(Path path) {
- String pathStr = path.toString();
- return pathStr.endsWith(".java")
- && (pathStr.contains(sourceDir) || pathStr.contains(testDir))
- && EXCLUDED_FILES.stream().noneMatch(pathStr::contains);
- }
-
- private void processFile(Path path, ScanTarget target, List violations) {
- try {
- ParseResult parseResult = javaParser.parse(Files.newInputStream(path));
- parseResult.getResult().ifPresent(cu -> {
- if (target.includeComments()) {
- checkComments(cu, path, violations);
- }
- if (target.includeCode()) {
- checkCode(cu, path, violations);
- }
- });
- } catch (Exception e) {
- log.error("Error processing file: {}", path, e);
- }
- }
-
- private void checkComments(CompilationUnit cu, Path path, List violations) {
- cu.getAllContainedComments().stream()
- .filter(comment ->
- CHINESE_CHAR_PATTERN.matcher(comment.getContent()).find())
- .forEach(comment -> violations.add(
- formatViolation(path, "comment", comment.getContent().trim())));
- }
-
- private void checkCode(CompilationUnit cu, Path path, List violations) {
- cu.findAll(StringLiteralExpr.class).stream()
- .filter(str -> CHINESE_CHAR_PATTERN.matcher(str.getValue()).find())
- .forEach(str -> violations.add(formatViolation(path, "code", str.getValue())));
- }
-
- private String formatViolation(Path path, String location, String content) {
- return String.format("Chinese characters found in %s at %s: %s", location, path.toAbsolutePath(), content);
- }
-
- private void assertNoChineseCharacters(List violations) {
- Assertions.assertEquals(
- 0,
- violations.size(),
- () -> String.format(
- "Found Chinese characters in files:%n%s", String.join(System.lineSeparator(), violations)));
- }
-
- /**
- * Defines what content should be checked for Chinese characters
- */
- private enum ScanTarget {
- /**
- * Check only comments
- */
- COMMENTS(true, false),
- /**
- * Check only code (string literals)
- */
- CODE(false, true),
- /**
- * Check both comments and code
- */
- ALL(true, true);
-
- private final boolean checkComments;
- private final boolean checkCode;
-
- ScanTarget(boolean checkComments, boolean checkCode) {
- this.checkComments = checkComments;
- this.checkCode = checkCode;
- }
-
- public boolean includeComments() {
- return checkComments;
- }
-
- public boolean includeCode() {
- return checkCode;
- }
- }
-}