Skip to content

Commit 878f884

Browse files
hengyunabcban-xiu
andauthored
feat:实现正则表达式编译缓存功能 #3130 (#3131)
Co-authored-by: lbs <2322701154@qq.com>
1 parent 8c413e2 commit 878f884

File tree

4 files changed

+223
-3
lines changed

4 files changed

+223
-3
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package com.taobao.arthas.core.util;
2+
3+
import com.taobao.arthas.core.shell.term.impl.http.session.LRUCache;
4+
import java.util.regex.Pattern;
5+
6+
/**
7+
* 正则表达式缓存管理器
8+
* 用于缓存编译后的正则表达式对象,避免重复编译的开销
9+
*/
10+
public class RegexCacheManager {
11+
private static final RegexCacheManager INSTANCE = new RegexCacheManager();
12+
13+
// 使用LRUCache缓存编译后的正则表达式
14+
private final LRUCache<String, Pattern> regexCache;
15+
16+
// 缓存大小限制
17+
private static final int MAX_CACHE_SIZE = 100;
18+
19+
private RegexCacheManager() {
20+
// 初始化LRUCache,设置最大缓存大小
21+
this.regexCache = new LRUCache<>(MAX_CACHE_SIZE);
22+
}
23+
24+
public static RegexCacheManager getInstance() {
25+
return INSTANCE;
26+
}
27+
28+
/**
29+
* 获取正则表达式Pattern对象,优先从缓存获取,缓存未命中则编译并缓存
30+
*/
31+
public Pattern getPattern(String regex) {
32+
if (regex == null) {
33+
return null;
34+
}
35+
36+
// 从LRUCache获取
37+
Pattern pattern = regexCache.get(regex);
38+
if (pattern != null) {
39+
return pattern;
40+
}
41+
42+
// 缓存未命中,编译正则表达式
43+
// 不捕获PatternSyntaxException,让异常向上抛出,以便及时发现无效的正则表达式
44+
pattern = Pattern.compile(regex);
45+
// 缓存编译结果
46+
regexCache.put(regex, pattern);
47+
48+
return pattern;
49+
}
50+
51+
/**
52+
* 清理缓存
53+
*/
54+
public void clearCache() {
55+
regexCache.clear();
56+
}
57+
58+
/**
59+
* 获取缓存大小
60+
*/
61+
public int getCacheSize() {
62+
return regexCache.usedEntries();
63+
}
64+
65+
}
Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,32 @@
11
package com.taobao.arthas.core.util.matcher;
22

3+
import com.taobao.arthas.core.util.RegexCacheManager;
4+
import java.util.regex.Pattern;
5+
36
/**
47
* regex matcher
58
* @author ralf0131 2017-01-06 13:16.
69
*/
710
public class RegexMatcher implements Matcher<String> {
811

912
private final String pattern;
13+
private volatile Pattern compiledPattern;
1014

1115
public RegexMatcher(String pattern) {
1216
this.pattern = pattern;
1317
}
1418

1519
@Override
1620
public boolean matching(String target) {
17-
return null != target
18-
&& null != pattern
19-
&& target.matches(pattern);
21+
if (null == target || null == pattern) {
22+
return false;
23+
}
24+
25+
// 在第一次matching时才编译正则表达式
26+
if (compiledPattern == null) {
27+
compiledPattern = RegexCacheManager.getInstance().getPattern(pattern);
28+
}
29+
30+
return compiledPattern != null && compiledPattern.matcher(target).matches();
2031
}
2132
}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
package com.taobao.arthas.core.util;
2+
3+
import org.junit.Assert;
4+
import org.junit.Before;
5+
import org.junit.Test;
6+
7+
import java.util.regex.Pattern;
8+
import java.util.regex.PatternSyntaxException;
9+
10+
/**
11+
* RegexCacheManager测试类
12+
*/
13+
public class RegexCacheManagerTest {
14+
private RegexCacheManager cacheManager;
15+
16+
@Before
17+
public void setUp() {
18+
// 获取单例实例
19+
cacheManager = RegexCacheManager.getInstance();
20+
// 清理缓存,确保测试环境干净
21+
cacheManager.clearCache();
22+
}
23+
24+
/**
25+
* 测试基本缓存功能
26+
*/
27+
@Test
28+
public void testBasicCacheFunctionality() {
29+
// 测试缓存未命中的情况
30+
String regex1 = ".*Test.*";
31+
Pattern pattern1 = cacheManager.getPattern(regex1);
32+
Assert.assertNotNull(pattern1);
33+
Assert.assertEquals(1, cacheManager.getCacheSize());
34+
35+
// 测试缓存命中的情况
36+
Pattern pattern1Cached = cacheManager.getPattern(regex1);
37+
Assert.assertNotNull(pattern1Cached);
38+
Assert.assertSame(pattern1, pattern1Cached); // 应该是同一个对象
39+
Assert.assertEquals(1, cacheManager.getCacheSize()); // 缓存大小应该保持不变
40+
41+
// 测试多个正则表达式
42+
String regex2 = "^Test.*";
43+
Pattern pattern2 = cacheManager.getPattern(regex2);
44+
Assert.assertNotNull(pattern2);
45+
Assert.assertEquals(2, cacheManager.getCacheSize());
46+
47+
// 测试空正则表达式
48+
Pattern nullPattern = cacheManager.getPattern(null);
49+
Assert.assertNull(nullPattern);
50+
51+
Pattern emptyPattern = cacheManager.getPattern("");
52+
Assert.assertNotNull(emptyPattern);
53+
Assert.assertTrue(emptyPattern.matcher("").matches());
54+
Assert.assertFalse(emptyPattern.matcher("non-empty").matches());
55+
Assert.assertEquals(3, cacheManager.getCacheSize());
56+
}
57+
58+
/**
59+
* 测试LRU淘汰策略
60+
*/
61+
@Test
62+
public void testLRUEvictionPolicy() {
63+
// 生成多个正则表达式,超过最大缓存大小
64+
int maxCacheSize = 100;
65+
for (int i = 0; i < maxCacheSize + 5; i++) {
66+
String regex = "TestRegex" + i;
67+
Pattern pattern = cacheManager.getPattern(regex);
68+
Assert.assertNotNull(pattern);
69+
}
70+
71+
// 缓存大小应该等于最大缓存大小
72+
Assert.assertEquals(maxCacheSize, cacheManager.getCacheSize()); // 100 是实际的最大缓存大小
73+
74+
// 测试访问顺序,确保LRU策略生效
75+
String firstRegex = "TestRegex0";
76+
77+
// 再次访问第一个正则表达式,使其成为最近使用的
78+
Pattern firstPattern = cacheManager.getPattern(firstRegex);
79+
Assert.assertNotNull(firstPattern);
80+
81+
// 再添加一个新的正则表达式,应该淘汰最久未使用的
82+
String newRegex = "NewTestRegex";
83+
Pattern newPattern = cacheManager.getPattern(newRegex);
84+
Assert.assertNotNull(newPattern);
85+
86+
// 第一个正则表达式应该仍然在缓存中(因为刚被访问过)
87+
Pattern firstPatternAgain = cacheManager.getPattern(firstRegex);
88+
Assert.assertNotNull(firstPatternAgain);
89+
}
90+
91+
/**
92+
* 测试缓存清理功能
93+
*/
94+
@Test
95+
public void testCacheClear() {
96+
// 添加一些缓存项
97+
cacheManager.getPattern(".*Test1");
98+
cacheManager.getPattern(".*Test2");
99+
Assert.assertTrue(cacheManager.getCacheSize() > 0);
100+
101+
// 清理缓存
102+
cacheManager.clearCache();
103+
Assert.assertEquals(0, cacheManager.getCacheSize());
104+
105+
// 清理后应该可以重新添加缓存项
106+
Pattern pattern = cacheManager.getPattern(".*Test3");
107+
Assert.assertNotNull(pattern);
108+
Assert.assertEquals(1, cacheManager.getCacheSize());
109+
}
110+
111+
/**
112+
* 测试无效正则表达式处理
113+
*/
114+
@Test
115+
public void testInvalidRegexHandling() {
116+
// 测试无效的正则表达式,应该抛出PatternSyntaxException
117+
String invalidRegex = "[a-z";
118+
try {
119+
cacheManager.getPattern(invalidRegex);
120+
} catch (Exception e) {
121+
// 验证抛出的是PatternSyntaxException
122+
Assert.assertTrue("Expected PatternSyntaxException but got " + e.getClass().getName(), e instanceof PatternSyntaxException);
123+
}
124+
125+
// 测试另一个无效的正则表达式,应该抛出PatternSyntaxException
126+
String anotherInvalidRegex = "(a-z";
127+
try {
128+
cacheManager.getPattern(anotherInvalidRegex);
129+
} catch (Exception e) {
130+
// 验证抛出的是PatternSyntaxException
131+
Assert.assertTrue("Expected PatternSyntaxException but got " + e.getClass().getName(), e instanceof PatternSyntaxException);
132+
}
133+
134+
// 确保缓存大小没有增加
135+
Assert.assertEquals("无效正则表达式不应该被缓存", 0, cacheManager.getCacheSize());
136+
}
137+
138+
}

core/src/test/java/com/taobao/arthas/core/util/matcher/RegexMatcherTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ public void testMatchingWithNullInputs(){
1616
Assert.assertTrue(new RegexMatcher("foobar").matching("foobar"));
1717
}
1818

19+
@Test
20+
public void testMatchingWithEmptyPattern() {
21+
Assert.assertTrue(new RegexMatcher("").matching(""));
22+
Assert.assertFalse(new RegexMatcher("").matching("foobar"));
23+
}
24+
1925
/**
2026
* test regux with . | * + ? \s \S \w \W and so on...
2127
*/

0 commit comments

Comments
 (0)