1
+ load " $LIB_BATS_ASSERT /load.bash"
2
+ load " $LIB_BATS_SUPPORT /load.bash"
3
+
4
+ @test " verify grok format definition" {
5
+ cat << EOF > $TAILPIPE_INSTALL_DIR /config/format_grok.tpc
6
+ format "grok" "steampipe_plugin" {
7
+ layout = \` %{TIMESTAMP_ISO8601:timestamp} %{WORD:timezone} \[%{LOGLEVEL:severity}\]\s+(?:%{NOTSPACE:plugin_name}: \[%{LOGLEVEL:plugin_severity}\]\s+%{NUMBER:plugin_timestamp}:\s+)?%{GREEDYDATA:message}\`
8
+ }
9
+
10
+ table "steampipe_plugin" {
11
+ format = format.grok.steampipe_plugin
12
+
13
+ column "tp_timestamp" {
14
+ source = "timestamp"
15
+ }
16
+
17
+ column "plugin_timestamp" {
18
+ type = "timestamp"
19
+ }
20
+ }
21
+
22
+ partition "steampipe_plugin" "local" {
23
+ source "file" {
24
+ format = format.grok.steampipe_plugin
25
+ paths = ["$SOURCE_FILES_DIR /custom_logs/"]
26
+ file_layout = \` plugin-%{YEAR:year}-%{MONTHNUM:month}-%{MONTHDAY:day}.log\`
27
+ }
28
+ }
29
+ EOF
30
+
31
+ # Run collection and verify
32
+ tailpipe collect steampipe_plugin --progress=false --from=2025-04-26
33
+
34
+ # Verify data was collected correctly
35
+ run tailpipe query " select plugin_name, tp_timestamp from steampipe_plugin limit 1" --output csv
36
+ echo $output
37
+
38
+ assert_equal " $output " " plugin_name,tp_timestamp
39
+ steampipe-plugin-aws.plugin,2025-04-28 15:16:35"
40
+
41
+ # Cleanup
42
+ rm -rf $TAILPIPE_INSTALL_DIR /config/format_grok.tpc
43
+ }
44
+
45
+ @test " verify delimited format definition" {
46
+ cat << EOF > $TAILPIPE_INSTALL_DIR /config/format_delimited.tpc
47
+ format "delimited" "access_log" {
48
+ delimiter = ","
49
+ header = true
50
+ }
51
+
52
+ table "access_log" {
53
+ format = format.delimited.access_log
54
+
55
+ column "tp_timestamp" {
56
+ source = "timestamp"
57
+ }
58
+
59
+ column "ip_address" {
60
+ type = "varchar"
61
+ }
62
+
63
+ column "user_agent" {
64
+ type = "varchar"
65
+ }
66
+
67
+ column "status_code" {
68
+ type = "integer"
69
+ }
70
+ }
71
+
72
+ partition "access_log" "local" {
73
+ source "file" {
74
+ format = format.delimited.access_log
75
+ paths = ["$SOURCE_FILES_DIR /custom_logs/"]
76
+ file_layout = "access_log.csv"
77
+ }
78
+ }
79
+ EOF
80
+
81
+ # Run collection and verify
82
+ tailpipe collect access_log --progress=false --from=2024-04-30
83
+
84
+ # Verify data was collected correctly
85
+ run tailpipe query " select ip_address, status_code from access_log limit 1" --output csv
86
+ echo $output
87
+
88
+ assert_equal " $output " " ip_address,status_code
89
+ 192.168.1.1,200"
90
+
91
+ # Cleanup
92
+ rm -rf $TAILPIPE_INSTALL_DIR /config/format_delimited.tpc
93
+ }
94
+
95
+ @test " verify jsonl format definition" {
96
+ cat << EOF > $TAILPIPE_INSTALL_DIR /config/format_jsonl.tpc
97
+ format "jsonl" "server_metrics" {
98
+ description = "Server metrics in JSON Lines format"
99
+ }
100
+
101
+ table "server_metrics" {
102
+ format = format.jsonl.server_metrics
103
+
104
+ column "tp_timestamp" {
105
+ source = "timestamp"
106
+ }
107
+
108
+ column "server_id" {
109
+ type = "varchar"
110
+ }
111
+
112
+ column "cpu_usage" {
113
+ type = "float"
114
+ }
115
+
116
+ column "memory_used" {
117
+ type = "integer"
118
+ }
119
+
120
+ column "is_healthy" {
121
+ type = "boolean"
122
+ }
123
+ }
124
+
125
+ partition "server_metrics" "local" {
126
+ source "file" {
127
+ format = format.jsonl.server_metrics
128
+ paths = ["$SOURCE_FILES_DIR /custom_logs/"]
129
+ file_layout = "server_metrics.jsonl"
130
+ }
131
+ }
132
+ EOF
133
+
134
+ # Run collection and verify
135
+ tailpipe collect server_metrics --progress=false --from=2024-04-30
136
+
137
+ # Verify data was collected correctly
138
+ run tailpipe query " select server_id, cpu_usage, memory_used, is_healthy from server_metrics limit 1" --output csv
139
+ echo $output
140
+
141
+ assert_equal " $output " " server_id,cpu_usage,memory_used,is_healthy
142
+ srv-001,75.5,8192,true"
143
+
144
+ # Cleanup
145
+ rm -rf $TAILPIPE_INSTALL_DIR /config/format_jsonl.tpc
146
+ }
147
+
148
+ @test " verify regex format definition" {
149
+ cat << EOF > $TAILPIPE_INSTALL_DIR /config/format_regex.tpc
150
+ format "regex" "plugin_log" {
151
+ layout = \` ^(?P<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{3})\s+(?P<timezone>\w+)\s+\[(?P<log_level>\w+)\]\s+(?P<plugin_name>[\w.-]+)?(?:\s*:\s*\[(?P<plugin_log_level>\w+)\]\s+(?P<plugin_timestamp>\d+):\s+)?(?P<message>.*)\`
152
+ }
153
+
154
+ table "plugin_log" {
155
+ format = format.regex.plugin_log
156
+
157
+ column "tp_timestamp" {
158
+ source = "timestamp"
159
+ }
160
+
161
+ column "log_level" {
162
+ type = "varchar"
163
+ }
164
+
165
+ column "plugin_name" {
166
+ type = "varchar"
167
+ }
168
+
169
+ column "plugin_log_level" {
170
+ type = "varchar"
171
+ }
172
+
173
+ column "message" {
174
+ type = "varchar"
175
+ }
176
+ }
177
+
178
+ partition "plugin_log" "local" {
179
+ source "file" {
180
+ format = format.regex.plugin_log
181
+ paths = ["$SOURCE_FILES_DIR /custom_logs/"]
182
+ file_layout = \` plugin-%{YEAR:year}-%{MONTHNUM:month}-%{MONTHDAY:day}.log\`
183
+ }
184
+ }
185
+ EOF
186
+
187
+ # Run collection and verify
188
+ tailpipe collect plugin_log --progress=false --from=2025-04-28
189
+
190
+ # Verify data was collected correctly
191
+ run tailpipe query " select log_level, plugin_name, message from plugin_log limit 1" --output csv
192
+ echo $output
193
+
194
+ assert_equal " $output " " log_level,plugin_name,message
195
+ DEBUG,steampipe-plugin-aws.plugin,\" retrying request Lambda/ListFunctions, attempt 8\" "
196
+
197
+ # Cleanup
198
+ rm -rf $TAILPIPE_INSTALL_DIR /config/format_regex.tpc
199
+ }
200
+
201
+ @test " verify grok format with nested patterns" {
202
+ cat << EOF > $TAILPIPE_INSTALL_DIR /config/format_grok_nested.tpc
203
+ format "grok" "aws_log" {
204
+ layout = \` %{TIMESTAMP_ISO8601:timestamp} \[%{LOGLEVEL:log_level}\] \[AWS\] RequestID: %{NOTSPACE:request_id}, Service: %{WORD:service}, Operation: %{WORD:operation}(?:, Duration: %{NUMBER:duration}ms)?(?:, Error: %{GREEDYDATA:error})?\`
205
+ }
206
+
207
+ table "aws_log" {
208
+ format = format.grok.aws_log
209
+
210
+ column "tp_timestamp" {
211
+ source = "timestamp"
212
+ }
213
+
214
+ column "log_level" {
215
+ type = "varchar"
216
+ }
217
+
218
+ column "request_id" {
219
+ type = "varchar"
220
+ }
221
+
222
+ column "service" {
223
+ type = "varchar"
224
+ }
225
+
226
+ column "operation" {
227
+ type = "varchar"
228
+ }
229
+
230
+ column "duration" {
231
+ type = "integer"
232
+ }
233
+
234
+ column "error" {
235
+ type = "varchar"
236
+ }
237
+ }
238
+
239
+ partition "aws_log" "local" {
240
+ source "file" {
241
+ format = format.grok.aws_log
242
+ paths = ["$SOURCE_FILES_DIR /custom_logs/"]
243
+ file_layout = "nested_patterns.log"
244
+ }
245
+ }
246
+ EOF
247
+
248
+ # Run collection and verify
249
+ tailpipe collect aws_log --progress=false --from=2024-04-30
250
+
251
+ # Verify data was collected correctly
252
+ run tailpipe query " select log_level, service, operation, duration from aws_log order by request_id" --output csv
253
+ echo $output
254
+
255
+ assert_equal " $output " " log_level,service,operation,duration
256
+ INFO,s3,ListBuckets,150
257
+ ERROR,ec2,DescribeInstances,
258
+ DEBUG,lambda,Invoke,45"
259
+
260
+ # Cleanup
261
+ rm -rf $TAILPIPE_INSTALL_DIR /config/format_grok_nested.tpc
262
+ }
263
+
264
+ function teardown() {
265
+ rm -rf $TAILPIPE_INSTALL_DIR /data
266
+ }
0 commit comments