Skip to content

Commit 1015216

Browse files
committed
Codemod plus tests
1 parent 0fc55be commit 1015216

File tree

5 files changed

+51451
-11
lines changed

5 files changed

+51451
-11
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package io.codemodder.codemods.codeql;
2+
3+
import io.codemodder.testutils.CodemodTestMixin;
4+
import io.codemodder.testutils.Metadata;
5+
6+
@Metadata(
7+
codemodType = CodeQLRegexDoSCodemod.class,
8+
testResourceDir = "codeql-regexdos",
9+
renameTestFile = "app/src/main/java/org/apache/roller/util/RegexUtil.java",
10+
expectingFixesAtLines = {62},
11+
dependencies = {})
12+
final class CodeQLRegexDoSCodemodTest implements CodemodTestMixin {}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. The ASF licenses this file to You
4+
* under the Apache License, Version 2.0 (the "License"); you may not
5+
* use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License. For additional information regarding
15+
* copyright in this work, please see the NOTICE file in the top level
16+
* directory of this distribution.
17+
*/
18+
19+
package org.apache.roller.util;
20+
21+
import java.nio.charset.StandardCharsets;
22+
import java.util.ArrayList;
23+
import java.util.List;
24+
import java.util.concurrent.Callable;
25+
import java.util.concurrent.Executors;
26+
import java.util.regex.Matcher;
27+
import java.util.regex.Pattern;
28+
import org.apache.commons.codec.binary.Hex;
29+
30+
31+
/**
32+
* Regular expressions utility class.
33+
*/
34+
public final class RegexUtil {
35+
36+
public static final Pattern MAILTO_PATTERN =
37+
Pattern.compile("mailto:([a-zA-Z0-9\\.\\-]+@[a-zA-Z0-9\\.\\-]+\\.[a-zA-Z0-9]+)");
38+
39+
public static final Pattern EMAIL_PATTERN =
40+
Pattern.compile("\\b[a-zA-Z0-9\\.\\-]+(@)([a-zA-Z0-9\\.\\-]+)(\\.)([a-zA-Z0-9]+)\\b");
41+
42+
43+
public static String encodeEmail(String str) {
44+
// obfuscate mailto's: turns them into hex encoded,
45+
// so that browsers can still understand the mailto link
46+
Matcher mailtoMatch = MAILTO_PATTERN.matcher(str);
47+
while (mailtoMatch.find()) {
48+
String email = mailtoMatch.group(1);
49+
//System.out.println("email=" + email);
50+
String hexed = encode(email);
51+
str = str.replaceFirst("mailto:"+email, "mailto:"+hexed);
52+
}
53+
54+
return obfuscateEmail(str);
55+
}
56+
57+
58+
/**
59+
* obfuscate plaintext emails: makes them
60+
* "human-readable" - still too easy for
61+
* machines to parse however.
62+
*/
63+
public static String obfuscateEmail(String str) {
64+
Matcher emailMatch = EMAIL_PATTERN.matcher(str);
65+
while (executeWithTimeout(() -> emailMatch.find(), 5000)) {
66+
String at = emailMatch.group(1);
67+
//System.out.println("at=" + at);
68+
str = str.replaceFirst(at, "-AT-");
69+
70+
String dot = emailMatch.group(2) + emailMatch.group(3) + emailMatch.group(4);
71+
String newDot = emailMatch.group(2) + "-DOT-" + emailMatch.group(4);
72+
//System.out.println("dot=" + dot);
73+
str = str.replaceFirst(dot, newDot);
74+
}
75+
return str;
76+
}
77+
78+
79+
/**
80+
* Return the specified match "groups" from the pattern.
81+
* For each group matched a String will be entered in the ArrayList.
82+
*
83+
* @param pattern The Pattern to use.
84+
* @param match The String to match against.
85+
* @param group The group number to return in case of a match.
86+
* @return List of matched groups from the pattern.
87+
*/
88+
public static List<String> getMatches(Pattern pattern, String match, int group) {
89+
List<String> matches = new ArrayList<>();
90+
Matcher matcher = pattern.matcher(match);
91+
while (matcher.find()) {
92+
matches.add( matcher.group(group) );
93+
}
94+
return matches;
95+
}
96+
97+
98+
/**
99+
* Thanks to the folks at Blojsom (http://sf.net/projects/blojsom)
100+
* for showing me what I was doing wrong with the Hex class.
101+
*
102+
* @param email
103+
* @return
104+
*/
105+
public static String encode(String email) {
106+
StringBuilder result = new StringBuilder(16);
107+
char[] hexString = Hex.encodeHex(email.getBytes(StandardCharsets.UTF_8));
108+
for (int i = 0; i < hexString.length; i++) {
109+
if (i % 2 == 0) {
110+
result.append('%');
111+
}
112+
result.append(hexString[i]);
113+
}
114+
115+
return result.toString();
116+
}
117+
118+
public <E> E executeWithTimeout(final Callable<E> action, final int timeout) {
119+
Future<E> maybeResult = Executors.newSingleThreadExecutor().submit(action);
120+
try {
121+
return maybeResult.get(timeout, TimeUnit.MILLISECONDS);
122+
} catch (Exception e) {
123+
throw new RuntimeException("Failed to execute within time limit.");
124+
}
125+
}
126+
127+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. The ASF licenses this file to You
4+
* under the Apache License, Version 2.0 (the "License"); you may not
5+
* use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License. For additional information regarding
15+
* copyright in this work, please see the NOTICE file in the top level
16+
* directory of this distribution.
17+
*/
18+
19+
package org.apache.roller.util;
20+
21+
import java.nio.charset.StandardCharsets;
22+
import java.util.ArrayList;
23+
import java.util.List;
24+
import java.util.regex.Matcher;
25+
import java.util.regex.Pattern;
26+
import org.apache.commons.codec.binary.Hex;
27+
28+
29+
/**
30+
* Regular expressions utility class.
31+
*/
32+
public final class RegexUtil {
33+
34+
public static final Pattern MAILTO_PATTERN =
35+
Pattern.compile("mailto:([a-zA-Z0-9\\.\\-]+@[a-zA-Z0-9\\.\\-]+\\.[a-zA-Z0-9]+)");
36+
37+
public static final Pattern EMAIL_PATTERN =
38+
Pattern.compile("\\b[a-zA-Z0-9\\.\\-]+(@)([a-zA-Z0-9\\.\\-]+)(\\.)([a-zA-Z0-9]+)\\b");
39+
40+
41+
public static String encodeEmail(String str) {
42+
// obfuscate mailto's: turns them into hex encoded,
43+
// so that browsers can still understand the mailto link
44+
Matcher mailtoMatch = MAILTO_PATTERN.matcher(str);
45+
while (mailtoMatch.find()) {
46+
String email = mailtoMatch.group(1);
47+
//System.out.println("email=" + email);
48+
String hexed = encode(email);
49+
str = str.replaceFirst("mailto:"+email, "mailto:"+hexed);
50+
}
51+
52+
return obfuscateEmail(str);
53+
}
54+
55+
56+
/**
57+
* obfuscate plaintext emails: makes them
58+
* "human-readable" - still too easy for
59+
* machines to parse however.
60+
*/
61+
public static String obfuscateEmail(String str) {
62+
Matcher emailMatch = EMAIL_PATTERN.matcher(str);
63+
while (emailMatch.find()) {
64+
String at = emailMatch.group(1);
65+
//System.out.println("at=" + at);
66+
str = str.replaceFirst(at, "-AT-");
67+
68+
String dot = emailMatch.group(2) + emailMatch.group(3) + emailMatch.group(4);
69+
String newDot = emailMatch.group(2) + "-DOT-" + emailMatch.group(4);
70+
//System.out.println("dot=" + dot);
71+
str = str.replaceFirst(dot, newDot);
72+
}
73+
return str;
74+
}
75+
76+
77+
/**
78+
* Return the specified match "groups" from the pattern.
79+
* For each group matched a String will be entered in the ArrayList.
80+
*
81+
* @param pattern The Pattern to use.
82+
* @param match The String to match against.
83+
* @param group The group number to return in case of a match.
84+
* @return List of matched groups from the pattern.
85+
*/
86+
public static List<String> getMatches(Pattern pattern, String match, int group) {
87+
List<String> matches = new ArrayList<>();
88+
Matcher matcher = pattern.matcher(match);
89+
while (matcher.find()) {
90+
matches.add( matcher.group(group) );
91+
}
92+
return matches;
93+
}
94+
95+
96+
/**
97+
* Thanks to the folks at Blojsom (http://sf.net/projects/blojsom)
98+
* for showing me what I was doing wrong with the Hex class.
99+
*
100+
* @param email
101+
* @return
102+
*/
103+
public static String encode(String email) {
104+
StringBuilder result = new StringBuilder(16);
105+
char[] hexString = Hex.encodeHex(email.getBytes(StandardCharsets.UTF_8));
106+
for (int i = 0; i < hexString.length; i++) {
107+
if (i % 2 == 0) {
108+
result.append('%');
109+
}
110+
result.append(hexString[i]);
111+
}
112+
113+
return result.toString();
114+
}
115+
116+
}

0 commit comments

Comments
 (0)