Skip to content

Commit d58fce1

Browse files
[Feature]Check Chinese comments in the code (#8319)
1 parent 81de0a6 commit d58fce1

File tree

6 files changed

+194
-13
lines changed

6 files changed

+194
-13
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.seatunnel.api;
19+
20+
import org.junit.jupiter.api.Assertions;
21+
import org.junit.jupiter.api.Disabled;
22+
import org.junit.jupiter.api.Test;
23+
24+
import com.github.javaparser.JavaParser;
25+
import com.github.javaparser.ParseResult;
26+
import com.github.javaparser.ast.CompilationUnit;
27+
import com.github.javaparser.ast.comments.Comment;
28+
import com.github.javaparser.ast.visitor.VoidVisitorAdapter;
29+
import lombok.extern.slf4j.Slf4j;
30+
31+
import java.io.IOException;
32+
import java.nio.file.FileVisitOption;
33+
import java.nio.file.Files;
34+
import java.nio.file.Path;
35+
import java.nio.file.Paths;
36+
import java.util.ArrayList;
37+
import java.util.List;
38+
import java.util.regex.Pattern;
39+
import java.util.stream.Stream;
40+
41+
import static org.apache.seatunnel.api.ImportShadeClassCheckTest.isWindows;
42+
43+
@Slf4j
44+
public class ChineseCharacterCheckTest {
45+
46+
private final JavaParser JAVA_PARSER = new JavaParser();
47+
48+
private static final Pattern CHINESE_PATTERN = Pattern.compile("[\\u4e00-\\u9fa5]");
49+
50+
/** Defines what content should be checked for Chinese characters */
51+
public enum CheckScope {
52+
/** Check both comments and code */
53+
ALL,
54+
/** Check only comments */
55+
COMMENTS_ONLY,
56+
/** Check only code (string literals) */
57+
CODE_ONLY
58+
}
59+
60+
@Disabled("Currently only checking comments")
61+
@Test
62+
public void checkChineseCharactersInAll() {
63+
checkChineseCharacters(CheckScope.ALL);
64+
}
65+
66+
@Test
67+
public void checkChineseCharactersInCommentsOnly() {
68+
checkChineseCharacters(CheckScope.COMMENTS_ONLY);
69+
}
70+
71+
@Disabled("Currently only checking comments")
72+
@Test
73+
public void checkChineseCharactersInCodeOnly() {
74+
checkChineseCharacters(CheckScope.CODE_ONLY);
75+
}
76+
77+
private void checkChineseCharacters(CheckScope scope) {
78+
// Define path fragments for source and test Java files
79+
String mainPathFragment = isWindows ? "src\\main\\java" : "src/main/java";
80+
String testPathFragment2 = isWindows ? "src\\test\\java" : "src/test/java";
81+
82+
try (Stream<Path> paths = Files.walk(Paths.get(".."), FileVisitOption.FOLLOW_LINKS)) {
83+
List<String> filesWithChinese = new ArrayList<>();
84+
85+
// Filter Java files in the specified directories
86+
paths.filter(
87+
path -> {
88+
String pathString = path.toString();
89+
return pathString.endsWith(".java")
90+
&& (pathString.contains(mainPathFragment)
91+
|| pathString.contains(testPathFragment2));
92+
})
93+
.forEach(
94+
path -> {
95+
try {
96+
// Parse the Java file
97+
ParseResult<CompilationUnit> parseResult =
98+
JAVA_PARSER.parse(Files.newInputStream(path));
99+
100+
parseResult
101+
.getResult()
102+
.ifPresent(
103+
cu -> {
104+
// Check for Chinese characters in comments
105+
// if needed
106+
if (scope != CheckScope.CODE_ONLY) {
107+
List<Comment> comments =
108+
cu.getAllContainedComments();
109+
for (Comment comment : comments) {
110+
if (CHINESE_PATTERN
111+
.matcher(
112+
comment
113+
.getContent())
114+
.find()) {
115+
filesWithChinese.add(
116+
String.format(
117+
"Found Chinese characters in comment at %s: %s",
118+
path
119+
.toAbsolutePath(),
120+
comment.getContent()
121+
.trim()));
122+
}
123+
}
124+
}
125+
126+
// Check for Chinese characters in code if
127+
// needed
128+
if (scope != CheckScope.COMMENTS_ONLY) {
129+
ChineseCharacterVisitor visitor =
130+
new ChineseCharacterVisitor(
131+
path, filesWithChinese);
132+
visitor.visit(cu, null);
133+
}
134+
});
135+
136+
} catch (Exception e) {
137+
log.error("Error parsing file: {}", path, e);
138+
}
139+
});
140+
141+
// Assert that no files contain Chinese characters
142+
Assertions.assertEquals(
143+
0,
144+
filesWithChinese.size(),
145+
() ->
146+
String.format(
147+
"Found Chinese characters in following files (Scope: %s):\n%s",
148+
scope, String.join("\n", filesWithChinese)));
149+
150+
} catch (IOException e) {
151+
throw new RuntimeException(e);
152+
}
153+
}
154+
155+
private static class ChineseCharacterVisitor extends VoidVisitorAdapter<Void> {
156+
private final Path filePath;
157+
private final List<String> filesWithChinese;
158+
159+
public ChineseCharacterVisitor(Path filePath, List<String> filesWithChinese) {
160+
this.filePath = filePath;
161+
this.filesWithChinese = filesWithChinese;
162+
}
163+
164+
@Override
165+
public void visit(CompilationUnit cu, Void arg) {
166+
// Check for Chinese characters in string literals
167+
cu.findAll(com.github.javaparser.ast.expr.StringLiteralExpr.class)
168+
.forEach(
169+
str -> {
170+
if (CHINESE_PATTERN.matcher(str.getValue()).find()) {
171+
filesWithChinese.add(
172+
String.format(
173+
"Found Chinese characters in string literal at %s: %s",
174+
filePath.toAbsolutePath(), str.getValue()));
175+
}
176+
});
177+
super.visit(cu, arg);
178+
}
179+
}
180+
}

seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ private SeaTunnelRow convertFastLogContent(FastLog log) {
9090
.append("\":\"")
9191
.append(content.getValue())
9292
.append("\","));
93-
jsonStringBuilder.deleteCharAt(jsonStringBuilder.length() - 1); // 删除最后一个逗号
93+
// Remove the last comma
94+
jsonStringBuilder.deleteCharAt(jsonStringBuilder.length() - 1);
9495
jsonStringBuilder.append("}");
9596
// content field
9697
transformedRow.add(jsonStringBuilder.toString());

seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ SeaTunnelRow convert(TypesenseRecord rowRecord) {
115115
try {
116116
for (int i = 0; i < rowTypeInfo.getTotalFields(); i++) {
117117
fieldName = rowTypeInfo.getFieldName(i);
118-
value = doc.get(fieldName); // 字段值
118+
value = doc.get(fieldName);
119119
if (value != null) {
120-
seaTunnelDataType =
121-
rowTypeInfo.getFieldType(i); // seaTunnelDataType 为SeaTunnel类型
120+
// seaTunnelDataType is the SeaTunnel type
121+
seaTunnelDataType = rowTypeInfo.getFieldType(i);
122122
seaTunnelFields[i] = convertValue(seaTunnelDataType, value);
123123
}
124124
}

seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ public void close() throws SQLException {
738738
}
739739

740740
public void getErrorUrl(String message) {
741-
// 使用正则表达式匹配URL
741+
// Using regular expressions to match URLs
742742
Pattern pattern = Pattern.compile("http://[\\w./?=&-_]+");
743743
Matcher matcher = pattern.matcher(message);
744744
String urlString = null;
@@ -754,12 +754,12 @@ public void getErrorUrl(String message) {
754754
URL url = new URL(urlString);
755755
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
756756

757-
// 设置请求方法
757+
// Set the request method
758758
connection.setRequestMethod("GET");
759759

760-
// 设置连接超时时间
760+
// Set the connection timeout
761761
connection.setConnectTimeout(5000);
762-
// 设置读取超时时间
762+
// Set the read timeout
763763
connection.setReadTimeout(5000);
764764

765765
int responseCode = connection.getResponseCode();

seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ public void close() throws SQLException {
493493
}
494494

495495
public void getErrorUrl(String message) {
496-
// 使用正则表达式匹配URL
496+
// Using regular expressions to match URLs
497497
Pattern pattern = Pattern.compile("http://[\\w./?=&-_]+");
498498
Matcher matcher = pattern.matcher(message);
499499
String urlString = null;
@@ -509,12 +509,12 @@ public void getErrorUrl(String message) {
509509
URL url = new URL(urlString);
510510
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
511511

512-
// 设置请求方法
512+
// Set the request method
513513
connection.setRequestMethod("GET");
514514

515-
// 设置连接超时时间
515+
// Set the connection timeout
516516
connection.setConnectTimeout(5000);
517-
// 设置读取超时时间
517+
// Set the read timeout
518518
connection.setReadTimeout(5000);
519519

520520
int responseCode = connection.getResponseCode();

seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ private void extractFiles() {
108108
"sh", "-c", "cd " + CATALOG_DIR + " && tar -zxvf " + NAMESPACE_TAR);
109109
try {
110110
Process process = processBuilder.start();
111-
// 等待命令执行完成
111+
// Wait for the command to complete
112112
int exitCode = process.waitFor();
113113
if (exitCode == 0) {
114114
log.info("Extract files successful.");

0 commit comments

Comments
 (0)