Skip to content

Commit

Permalink
BIGTOP-4361: Add Chinese check (#176)
Browse files Browse the repository at this point in the history
  • Loading branch information
wuchunfu authored Feb 17, 2025
1 parent 9ff8193 commit 6c3119c
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 7 deletions.
124 changes: 124 additions & 0 deletions .github/check_chinese_character.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

import os
import re
from pathlib import Path
from typing import List, Set

class chinese_character_check_test:
CHINESE_CHAR_PATTERN = re.compile(r'[\u4e00-\u9fa5]')
# Exclude directories or files. If it is a file, just write the file name. The same is true for directories, just write the directory name.
EXCLUDED_DIRS_AND_FILES = {
"target",
"node_modules",
"dist",
}
# Supported file extensions
SUPPORTED_EXTENSIONS = {
".java",
".kt",
".scala",
".js",
".jsx",
".ts",
".tsx",
".vue"
}

def should_not_contain_chinese_in_comments(self):
violations = self.scan_for_chinese_characters(scan_target.COMMENTS)
self.assert_no_chinese_characters(violations)

def scan_for_chinese_characters(self, target: 'scan_target') -> List[str]:
violations = []
for ext in self.SUPPORTED_EXTENSIONS:
for path in Path("..").rglob(f"*{ext}"):
if self.is_valid_file(path) and not self.is_excluded(path):
self.process_file(path, target, violations)
return violations

def is_excluded(self, path: Path) -> bool:
path_str = str(path)
return any(excluded in path_str for excluded in self.EXCLUDED_DIRS_AND_FILES)

def is_valid_file(self, path: Path) -> bool:
path_str = str(path)
return any(path_str.endswith(ext) for ext in self.SUPPORTED_EXTENSIONS)

def process_file(self, path: Path, target: 'scan_target', violations: List[str]):
try:
with open(path, 'r', encoding='utf-8') as file:
content = file.read()
if target.include_comments():
self.check_comments(content, path, violations)
if target.include_code():
self.check_code(content, path, violations)
except Exception as e:
print(f"Error processing file: {path}")
print(e)

def check_comments(self, content: str, path: Path, violations: List[str]):
# Matching multiple types of comments
comment_patterns = [
r'//.*?$', # Single line comments
r'/\*.*?\*/', # Multi line comments
r'<!--.*?-->' # Vue/HTML,/javascript/typescript comments
]
for pattern in comment_patterns:
for comment in re.findall(pattern, content, re.DOTALL | re.MULTILINE):
if self.CHINESE_CHAR_PATTERN.search(comment):
violations.append(self.format_violation(path, "comment", comment.strip()))

def check_code(self, content: str, path: Path, violations: List[str]):
# Matching string literals in multiple languages
string_patterns = [
r'"[^"]*"', # Double quoted strings
r"'[^']*'" # Single quoted strings
]
for pattern in string_patterns:
for string_literal in re.findall(pattern, content):
if self.CHINESE_CHAR_PATTERN.search(string_literal):
violations.append(self.format_violation(path, "code", string_literal))

def format_violation(self, path: Path, location: str, content: str) -> str:
return f"Chinese characters found in {location} at {path.absolute()}: {content}"

def assert_no_chinese_characters(self, violations: List[str]):
assert len(violations) == 0, f"Found Chinese characters in files:\n{os.linesep.join(violations)}"

class scan_target:
def __init__(self, check_comments: bool, check_code: bool):
self.check_comments = check_comments
self.check_code = check_code

def include_comments(self) -> bool:
return self.check_comments

def include_code(self) -> bool:
return self.check_code

scan_target.COMMENTS = scan_target(True, False)
scan_target.CODE = scan_target(False, True)
scan_target.ALL = scan_target(True, True)

if __name__ == "__main__":
test = chinese_character_check_test()
test.should_not_contain_chinese_in_comments()
43 changes: 43 additions & 0 deletions .github/workflows/check_chinese_character.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

name: "Check"

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
check-chinese-character:
name: "Check Chinese Character"
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
persist-credentials: false
submodules: recursive
- name: Set python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Check Chinese Character
run: python .github/check_chinese_character.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ public static void writeProperties(String fileName, Map<String, Object> configMa
* @throws IOException
*/
public static void writeProperties(String fileName, List<Map<String, Object>> configList) {
// 创建Properties
Properties properties = new Properties();

for (Map<String, Object> map : configList) {
Expand Down
10 changes: 5 additions & 5 deletions bigtop-manager-ui/src/pages/login/index.vue
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
<div class="login-container">
<div class="login-content">
<div class="login-main">
<!-- 登录框头部 -->
<!-- Login box header -->
<div class="login-header">
<div class="login-header-left">
<img class="login-logo" src="@/assets/logo.svg" alt="logo" />
Expand All @@ -78,14 +78,14 @@
<div class="login-header-right"><select-lang /></div>
</div>
<a-divider class="m-0" />
<!-- 登录框主体 -->
<!-- Login box body -->
<div class="login-body">
<!-- 登录框主体左侧 -->
<!-- On the left side of the login box -->
<div class="login-body-left">
<img class="login-body-left-img" src="@/assets/images/login.png" alt="login" />
</div>
<a-divider class="login-body-divider m-0" type="vertical" />
<!-- 登录框主体右侧 -->
<!-- Right side of the login box -->
<div class="login-body-right">
<div class="login-body-right-tips">{{ $t('login.tips') }}</div>
<a-form ref="formRef" class="login-body-right-form" :model="loginModel">
Expand Down Expand Up @@ -150,7 +150,7 @@
</div>
</div>
<div class="copyright">
Copyright ©2011–2023
Copyright ©2024–2025
<a href="https://www.apache.org">The Apache Software Foundation</a>. All rights reserved.
</div>
</div>
Expand Down
2 changes: 1 addition & 1 deletion bigtop-manager-ui/tests/__utils__/array.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

import { arrayEquals } from '../../src/utils/array.ts' // 替换成实际的文件路径
import { arrayEquals } from '../../src/utils/array.ts' // Replace with the actual file path
import { describe, expect, test } from 'vitest'

describe('arrayEquals', () => {
Expand Down

0 comments on commit 6c3119c

Please sign in to comment.