Skip to content

Commit d5973db

Browse files
supervisorbiathlon3
supervisor
authored andcommitted
Implemented regex for locations and httptables
1 parent 2406021 commit d5973db

File tree

189 files changed

+58140
-4
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

189 files changed

+58140
-4
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ KERNEL = /lib/modules/$(shell uname -r)/build
146146

147147
export KERNEL TFW_CFLAGS AVX2 BMI2 ADX TFW_GCOV
148148

149-
obj-m += lib/ db/core/ fw/ tls/
149+
obj-m += lib/ db/core/ regex/ fw/ tls/
150150

151151
all: build
152152

fw/cfg.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@
109109
* them. Helpers below facilitate that.
110110
*/
111111

112+
unsigned short number_of_regex = 0;
113+
unsigned short number_of_db_regex = 0;
114+
112115
static const char *
113116
__alloc_and_copy_literal(const char *src, size_t len, bool keep_bs)
114117
{

fw/cfg.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,9 @@ struct TfwCfgSpec {
336336
void (*cleanup)(TfwCfgSpec *self);
337337
};
338338

339+
extern unsigned short number_of_regex;
340+
extern unsigned short number_of_db_regex;
341+
339342
/**
340343
* Walks over a NULL-terminated array of TfwCfgSpec structures.
341344
*/

fw/http_match.c

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#include "http_match.h"
7272
#include "http_msg.h"
7373
#include "cfg.h"
74+
#include "regex/kmod/rex.h"
7475

7576
/**
7677
* Map an operator to that flags passed to tfw_str_eq_*() functions.
@@ -88,6 +89,35 @@ map_op_to_str_eq_flags(tfw_http_match_op_t op)
8889
return flags_tbl[op];
8990
}
9091

92+
extern int bpf_scan_bytes(const void *, __u32, struct rex_scan_attr *);
93+
94+
bool
95+
tfw_match_regex(tfw_match_t op, const char *cstr, size_t len, const TfwStr *arg)
96+
{
97+
char sss[100];
98+
struct rex_scan_attr attr = {};
99+
memcpy(&attr.database_id, cstr, sizeof(unsigned short));
100+
101+
tfw_str_to_cstr(arg, sss, 100);
102+
103+
pr_notice("attr.database_id=%u", attr.database_id);
104+
pr_notice("arg=%s", sss);
105+
int r;
106+
bool result;
107+
pr_notice("test_regex\n");
108+
r = bpf_scan_bytes(arg->data, arg->len, &attr);
109+
110+
result = (!r && attr.nr_events && attr.last_event.expression);
111+
if (result)
112+
pr_notice("Expression found in pos=%llu\n number=%u",
113+
attr.last_event.to, attr.last_event.expression);
114+
else
115+
pr_notice("Expression not found!");
116+
117+
pr_notice("========================================================\n");
118+
return result;
119+
}
120+
91121
static bool
92122
tfw_rule_str_match(const TfwStr *str, const char *cstr,
93123
int cstr_len, tfw_str_eq_flags_t flags,
@@ -97,6 +127,9 @@ tfw_rule_str_match(const TfwStr *str, const char *cstr,
97127
return tfw_str_eq_cstr_off(str, str->len - cstr_len,
98128
cstr, cstr_len, flags);
99129

130+
if (op == TFW_HTTP_MATCH_O_REGEX)
131+
return tfw_match_regex(op, cstr, cstr_len, str);
132+
100133
return tfw_str_eq_cstr(str, cstr, cstr_len, flags);
101134
}
102135

@@ -706,6 +739,83 @@ tfw_http_escape_pre_post(char *out , const char *str)
706739
return len;
707740
}
708741

742+
/*
743+
* Here we create text file for every regex string which
744+
* can be readed by hscollider.
745+
* Next hscollider compile it and save to temporary DB.
746+
* After it will be loaded to regex module DB.
747+
* All operations after creating will be done in script start_regex.sh
748+
*
749+
* As it potentially possible situation then one DB conains several
750+
* expressions, here are two variables:
751+
* number_of_db_regex - nomber of databes which we will use to look for
752+
* expression;
753+
* number_of_regex - number of expression to know wich exactly expression
754+
* was matched (parsing for it has not not implemented yet)
755+
*
756+
* After this function, number_of_db_regex will be written to start of arg,
757+
* so the lenght of regex string must be longer then two bytes.
758+
*
759+
* Directory /tmp/tempesata is created from
760+
* tempesta.sh script.
761+
*/
762+
int
763+
write_regex(const char *arg)
764+
{
765+
struct file *fl;
766+
loff_t off = 0;
767+
int r;
768+
char file_name[25];
769+
char reg_number[6];
770+
int len = strlen(arg) - 1;
771+
int len1;
772+
773+
if (len < sizeof(unsigned short)) {
774+
T_ERR_NL("String of regex too short\n");
775+
return -EINVAL;
776+
}
777+
778+
++number_of_db_regex;
779+
sprintf(file_name, "/tmp/tempesta/%u.txt", number_of_db_regex);
780+
781+
fl = filp_open(file_name, O_CREAT | O_WRONLY, 0600);
782+
if (IS_ERR(fl)) {
783+
T_ERR_NL("Cannot create regex file %s\n",
784+
file_name);
785+
return -EINVAL;
786+
}
787+
BUG_ON(!fl || !fl->f_path.dentry);
788+
789+
if (!fl->f_op->fallocate) {
790+
T_ERR_NL("File requires filesystem with fallocate support\n");
791+
filp_close(fl, NULL);
792+
return -EINVAL;
793+
}
794+
795+
++number_of_regex;
796+
sprintf(reg_number, "%i:", number_of_regex);
797+
len1 = strlen(reg_number);
798+
r = kernel_write(fl, (void *)reg_number, len1, &off);
799+
if (r != len1)
800+
goto err;
801+
802+
r = kernel_write(fl, (void *)&arg[1], len, &off);
803+
if (r != len)
804+
goto err;
805+
806+
807+
r = kernel_write(fl, "\n", 1, &off);
808+
if (r != 1)
809+
goto err;
810+
811+
filp_close(fl, NULL);
812+
return 0;
813+
err:
814+
T_ERR_NL("Cannot write regex\n");
815+
filp_close(fl, NULL);
816+
return r;
817+
}
818+
709819
const char *
710820
tfw_http_arg_adjust(const char *arg, tfw_http_match_fld_t field,
711821
const char *raw_hdr_name, size_t *size_out,
@@ -751,6 +861,11 @@ tfw_http_arg_adjust(const char *arg, tfw_http_match_fld_t field,
751861
if (wc_arg || (len > 1 && arg[len - 1] == '*' && arg[len - 2] != '\\'))
752862
*op_out = TFW_HTTP_MATCH_O_PREFIX;
753863

864+
if (!wc_arg && arg[0] == '^') {
865+
*op_out = TFW_HTTP_MATCH_O_REGEX;
866+
write_regex(arg);
867+
}
868+
754869
/*
755870
* For argument started with wildcard, the suffix matching
756871
* pattern should be applied.
@@ -779,6 +894,12 @@ tfw_http_arg_adjust(const char *arg, tfw_http_match_fld_t field,
779894
len = tfw_http_escape_pre_post(pos, arg);
780895
*size_out += full_name_len + len + 1;
781896

897+
/*
898+
* Save number_of_db_regex to use it in tfw_match_regex
899+
*/
900+
if (*op_out == TFW_HTTP_MATCH_O_REGEX)
901+
memcpy(arg_out, &number_of_db_regex, sizeof(number_of_db_regex));
902+
782903
return arg_out;
783904
}
784905

fw/http_match.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ typedef enum {
5252
TFW_HTTP_MATCH_O_EQ,
5353
TFW_HTTP_MATCH_O_PREFIX,
5454
TFW_HTTP_MATCH_O_SUFFIX,
55+
TFW_HTTP_MATCH_O_REGEX,
5556
_TFW_HTTP_MATCH_O_COUNT
5657
} tfw_http_match_op_t;
5758

@@ -170,6 +171,11 @@ int tfw_http_search_cookie(const char *cstr, unsigned long clen,
170171
TfwStr **pos, TfwStr *end, TfwStr *val,
171172
tfw_http_match_op_t op, bool is_resp_hdr);
172173

174+
int write_regex(const char *arg);
175+
176+
bool tfw_match_regex(tfw_match_t op, const char *cstr, size_t len,
177+
const TfwStr *arg);
178+
173179
#define tfw_http_chain_rules_for_each(chain, func) \
174180
({ \
175181
int r = 0; \

fw/vhost.c

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "http_sess.h"
4040
#include "client.h"
4141
#include "tls_conf.h"
42+
#include "regex/kmod/rex.h"
4243

4344
/*
4445
* The hash table entry for mapping @sni to @vhost for SAN certificates handling.
@@ -75,6 +76,7 @@ static const TfwCfgEnum tfw_match_enum[] = {
7576
{ "eq", TFW_HTTP_MATCH_O_EQ },
7677
{ "prefix", TFW_HTTP_MATCH_O_PREFIX },
7778
{ "suffix", TFW_HTTP_MATCH_O_SUFFIX },
79+
{ "regex", TFW_HTTP_MATCH_O_REGEX },
7880
{ 0 }
7981
};
8082

@@ -177,6 +179,14 @@ __tfw_match_prefix(tfw_match_t op, const char *cstr, size_t len, TfwStr *arg)
177179
return tfw_str_eq_cstr(arg, cstr, len, flags);
178180
}
179181

182+
extern int bpf_scan_bytes(const void *, __u32, struct rex_scan_attr *);
183+
184+
static bool
185+
__tfw_match_regex(tfw_match_t op, const char *cstr, size_t len, TfwStr *arg)
186+
{
187+
return tfw_match_regex(op, cstr, len, arg);
188+
}
189+
180190
typedef bool (*__tfw_match_fn)(tfw_match_t, const char *, size_t, TfwStr *);
181191

182192
static const __tfw_match_fn __tfw_match_fn_tbl[] = {
@@ -185,6 +195,7 @@ static const __tfw_match_fn __tfw_match_fn_tbl[] = {
185195
[TFW_HTTP_MATCH_O_EQ] = __tfw_match_eq,
186196
[TFW_HTTP_MATCH_O_PREFIX] = __tfw_match_prefix,
187197
[TFW_HTTP_MATCH_O_SUFFIX] = __tfw_match_suffix,
198+
[TFW_HTTP_MATCH_O_REGEX] = __tfw_match_regex,
188199
};
189200

190201
/*
@@ -1290,8 +1301,15 @@ tfw_location_init(TfwLocation *loc, tfw_match_t op, const char *arg,
12901301
+ sizeof(TfwHdrModsDesc) * TFW_USRHDRS_ARRAY_SZ * 2
12911302
+ sizeof(TfwHdrModsDesc *) * TFW_HTTP_HDR_RAW * 2;
12921303

1293-
if ((argmem = kmalloc(len + 1, GFP_KERNEL)) == NULL)
1294-
return -ENOMEM;
1304+
if (op != TFW_HTTP_MATCH_O_REGEX) {
1305+
if ((argmem = kmalloc(len + 1, GFP_KERNEL)) == NULL)
1306+
return -ENOMEM;
1307+
}
1308+
else {/*If it is a regex we need only number of DB*/
1309+
if ((argmem = kmalloc(2 + 1, GFP_KERNEL)) == NULL)
1310+
return -ENOMEM;
1311+
}
1312+
12951313
if ((data = kzalloc(size, GFP_KERNEL)) == NULL) {
12961314
kfree(argmem);
12971315
return -ENOMEM;
@@ -1325,7 +1343,16 @@ tfw_location_init(TfwLocation *loc, tfw_match_t op, const char *arg,
13251343
(TfwHdrModsDesc **)(loc->mod_hdrs[TFW_VHOST_HDRMOD_RESP].hdrs
13261344
+ TFW_USRHDRS_ARRAY_SZ);
13271345

1328-
memcpy((void *)loc->arg, (void *)arg, len + 1);
1346+
if (op != TFW_HTTP_MATCH_O_REGEX)
1347+
memcpy((void *)loc->arg, (void *)arg, len + 1);
1348+
else {
1349+
write_regex(arg);
1350+
/*
1351+
* Save number_of_db_regex to use it in tfw_match_regex
1352+
*/
1353+
memcpy((void *)loc->arg, (void *)&number_of_db_regex,
1354+
sizeof(number_of_db_regex));
1355+
}
13291356

13301357
return 0;
13311358
}

install.txt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
Colm (Colm Programming Language)
2+
git clone https://github.com/adrian-thurston/colm.git
3+
4+
$ ./autogen.sh
5+
$ ./configure
6+
$ make
7+
$ make install
8+
9+
add LD_LIBRARY_PATH="/usr/local/lib" to /etc/environment
10+
11+
12+
13+
Regal
14+
git clone https://github.com/adrian-thurston/ragel.git
15+
16+
$ ./autogen.sh
17+
$ ./configure --with-colm=/usr/local/
18+
$ make
19+
$ make install
20+
21+
22+
23+
https://github.com/tempesta-tech/linux-regex-module.git
24+
./cmake ./
25+
26+
after compilation
27+
copy hscollider from /linux-regex-module/bin/ to /tempesta/scripts/
28+

regex/Makefile

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
obj-m := xdp_rex.o
2+
3+
CC_FLAGS_HSRUNTIME := -isystem $(shell $(CC) -print-file-name=include)
4+
CC_FLAGS_REMOVE_SIMD := -mno-80387 -mno-fp-ret-in-387 -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
5+
CC_FLAGS_HSRUNTIME += -DHAVE_SSE2
6+
CC_FLAGS_SIMD := -msse4.2 -msse4.1
7+
CC_FLAGS_HSRUNTIME += -DHAVE_SSE41 -DHAVE_SSE42
8+
CC_FLAGS_SIMD += -mavx -mavx2
9+
CC_FLAGS_HSRUNTIME += -DHAVE_AVX -DHAVE_AVX2
10+
#CC_FLAGS_SIMD += -mavx512f -mavx512cd -mavx512bw -mavx512vl -mavx512vnni
11+
#CC_FLAGS_HSRUNTIME += -DHAVE_AVX512
12+
#CC_FLAGS_SIMD += -mavx512vbmi -mavx512vbmi2 -mavx512vnni
13+
#CC_FLAGS_HSRUNTIME += -DHAVE_AVX512VBMI
14+
15+
CC_FLAGS_HSRUNTIME += $(CC_FLAGS_SIMD)
16+
CC_FLAGS_REMOVE_HSRUNTIME := $(CC_FLAGS_REMOVE_SIMD)
17+
CC_FLAGS_REMOVE_HSRUNTIME += -Wdeclaration-after-statement
18+
CC_FLAGS_HSRUNTIME += -Wframe-larger-than=2048
19+
CC_FLAGS_HSRUNTIME += -std=gnu11
20+
CC_FLAGS_REMOVE_HSRUNTIME += -std=gnu99
21+
22+
ccflags-y += -std=c99
23+
ccflags-y += -I$(src) -I$(src)/kmod
24+
ccflags-y += $(CC_FLAGS_HSRUNTIME)
25+
ccflags-remove-y += $(CC_FLAGS_REMOVE_HSRUNTIME)
26+
27+
CFLAGS_kmod/rex.o := $(CC_FLAGS_REMOVE_HSRUNTIME)
28+
CFLAGS_REMOVE_kmod/rex.o := $(CC_FLAGS_HSRUNTIME)
29+
CFLAGS_alloc.o := $(CC_FLAGS_REMOVE_SIMD)
30+
CFLAGS_REMOVE_alloc.o := $(CC_FLAGS_SIMD)
31+
CFLAGS_scratch.o := $(CC_FLAGS_REMOVE_SIMD)
32+
CFLAGS_REMOVE_scratch.o := $(CC_FLAGS_SIMD)
33+
CFLAGS_database.o := $(CC_FLAGS_REMOVE_SIMD)
34+
CFLAGS_REMOVE_database.o := $(CC_FLAGS_SIMD)
35+
36+
xdp_rex-m := kmod/rex.o \
37+
alloc.o \
38+
scratch.o \
39+
runtime.o \
40+
database.o \
41+
hs_version.o \
42+
stream_compress.o \
43+
fdr/fdr.o \
44+
fdr/teddy_avx2.o \
45+
fdr/teddy.o \
46+
hwlm/hwlm.o \
47+
hwlm/noodle_engine.o \
48+
nfa/accel.o \
49+
nfa/castle.o \
50+
nfa/gough.o \
51+
nfa/lbr.o \
52+
nfa/limex_64.o \
53+
nfa/limex_accel.o \
54+
nfa/limex_native.o \
55+
nfa/limex_simd128.o \
56+
nfa/limex_simd256.o \
57+
nfa/limex_simd384.o \
58+
nfa/limex_simd512.o \
59+
nfa/mcclellan.o \
60+
nfa/mcsheng.o \
61+
nfa/mcsheng_data.o \
62+
nfa/mpv.o \
63+
nfa/nfa_api_dispatch.o \
64+
nfa/repeat.o \
65+
nfa/sheng.o \
66+
nfa/shufti.o \
67+
nfa/tamarama.o \
68+
nfa/truffle.o \
69+
rose/block.o \
70+
rose/catchup.o \
71+
rose/init.o \
72+
rose/match.o \
73+
rose/program_runtime.o \
74+
rose/stream.o \
75+
som/som_runtime.o \
76+
som/som_stream.o \
77+
util/cpuid_flags.o \
78+
util/masked_move.o \
79+
util/multibit.o \
80+
util/simd_utils.o \
81+
util/state_compress.o \
82+
#

0 commit comments

Comments
 (0)