Skip to content

Commit 8a50aa3

Browse files
authored
tests: custom tables - core formats and column/table blocks (#392)
* test: add core format tests and test data files - Add test data files for CSV, Grok nested patterns, plugin logs, and JSONL metrics - Add core format tests for regex, delimited, grok, and jsonl formats - Include test cases for null values and nested patterns * ci: add core_formats to acceptance test matrix * test: add column block tests
1 parent 604ec3f commit 8a50aa3

File tree

10 files changed

+454
-5
lines changed

10 files changed

+454
-5
lines changed

.github/workflows/11-test-acceptance.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ jobs:
9595
- "partition_tests"
9696
- "file_source"
9797
- "partition_delete"
98+
- "core_formats"
99+
- "table_block"
98100
runs-on: ${{ matrix.platform }}
99101
steps:
100102
- name: Checkout

internal/parse/decode.go

-5
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,6 @@ func handleUnknownHcl(block *hcl.Block, parseCtx *ConfigParseContext, unknownAtt
215215
var diags hcl.Diagnostics
216216
unknown := &config.HclBytes{}
217217

218-
// First, include the block's own range
219-
hclBytes := parseCtx.FileData[block.DefRange.Filename]
220-
blockRange := hclhelpers.BlockRangeWithLabels(block)
221-
unknown.Merge(config.HclBytesForLines(hclBytes, blockRange))
222-
223218
for _, attr := range unknownAttrs {
224219
// get the hcl bytes for the file
225220
hclBytes := parseCtx.FileData[block.DefRange.Filename]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
timestamp,ip_address,user_agent,status_code
2+
2024-05-01T10:30:45Z,192.168.1.1,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",200
3+
2024-05-01T10:31:00Z,192.168.1.2,"Mozilla/5.0 (Windows NT 10.0; Win64; x64)",404
4+
2024-05-01T10:32:15Z,192.168.1.3,"Mozilla/5.0 (Linux; Android 10)",200
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2024-05-01T10:30:45Z [INFO] [AWS] RequestID: req-1234, Service: s3, Operation: ListBuckets, Duration: 150ms
2+
2024-05-01T10:31:00Z [ERROR] [AWS] RequestID: req-5678, Service: ec2, Operation: DescribeInstances, Error: {"code": "InvalidInstanceID", "message": "The instance ID 'i-1234567890abcdef0' does not exist"}
3+
2024-05-01T10:32:15Z [DEBUG] [AWS] RequestID: req-9012, Service: lambda, Operation: Invoke, Duration: 45ms
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
id,status,value,description
2+
1,active,42,normal value
3+
2,inactive,0,zero value
4+
3,active,-1,negative value
5+
4,active,2,empty value
6+
5,active,999,special value
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2025-04-28 15:16:35.733 UTC [DEBUG] steampipe-plugin-aws.plugin: [DEBUG] 1744125262935: retrying request Lambda/ListFunctions, attempt 8
2+
2025-04-28 15:16:35.733 UTC [INFO] steampipe-plugin-aws.plugin: [INFO] 1744125273258: BackoffDelay: attempt=8, retryTime=2m55.50675s, err=https response error StatusCode: 0, RequestID: , request send failed, Get "https://lambda.ap-northeast-1.amazonaws.com/2015-03-31/functions?MaxItems=10000": lookup lambda.ap-northeast-1.amazonaws.com on 192.168.1.254:53: read udp 192.168.1.204:57677->192.168.1.254:53: i/o timeout
3+
2025-04-28 15:16:36.033 UTC [DEBUG] steampipe-plugin-aws.plugin: [DEBUG] 1744125262935: retrying request Lambda/ListFunctions, attempt 8
4+
2025-04-28 15:16:36.033 UTC [INFO] steampipe-plugin-aws.plugin: [INFO] 1744125273258: BackoffDelay: attempt=8, retryTime=2m16.14075s, err=https response error StatusCode: 0, RequestID: , request send failed, Get "https://lambda.ap-southeast-2.amazonaws.com/2015-03-31/functions?MaxItems=10000": lookup lambda.ap-southeast-2.amazonaws.com on 192.168.1.254:53: read udp 192.168.1.204:54718->192.168.1.254:53: i/o timeout
5+
2025-04-28 15:16:38.463 UTC [DEBUG] steampipe-plugin-aws.plugin: [DEBUG] 1744125262935: retrying request Lambda/ListFunctions, attempt 8
6+
2025-04-28 15:16:38.464 UTC [INFO] steampipe-plugin-aws.plugin: [INFO] 1744125273258: BackoffDelay: attempt=8, retryTime=2m44.025s, err=https response error StatusCode: 0, RequestID: , request send failed, Get "https://lambda.ap-northeast-3.amazonaws.com/2015-03-31/functions?MaxItems=10000": lookup lambda.ap-northeast-3.amazonaws.com on 192.168.1.254:53: read udp 192.168.1.204:55845->192.168.1.254:53: i/o timeout
7+
2025-04-28 15:16:58.442 UTC [INFO] PluginManager Shutdown
8+
2025-04-28 15:16:58.442 UTC [INFO] PluginManager closing pool
9+
2025-04-28 15:16:58.442 UTC [INFO] Kill plugin hub.steampipe.io/plugins/turbot/terraform@latest (0xc00092e100)
10+
2025-04-28 15:16:58.442 UTC [DEBUG] PluginManager killPlugin start
11+
2025-04-28 15:16:58.442 UTC [INFO] PluginManager killing plugin hub.steampipe.io/plugins/turbot/terraform@latest (81771)
12+
2025-04-28 15:16:58.456 UTC [DEBUG] stdio: received EOF, stopping recv loop: err="rpc error: code = Unavailable desc = error reading from server: EOF"
13+
2025-04-28 15:16:58.460 UTC [INFO] plugin process exited: plugin=/Users/jsmyth/.steampipe/plugins/hub.steampipe.io/plugins/turbot/terraform@latest/steampipe-plugin-terraform.plugin id=81771
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"timestamp": "2024-05-01T10:30:45Z", "server_id": "srv-001", "cpu_usage": 75.5, "memory_used": 8192, "is_healthy": true}
2+
{"timestamp": "2024-05-01T10:31:00Z", "server_id": "srv-002", "cpu_usage": 90.2, "memory_used": 16384, "is_healthy": false}
3+
{"timestamp": "2024-05-01T10:32:15Z", "server_id": "srv-003", "cpu_usage": 45.8, "memory_used": 4096, "is_healthy": true}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
timestamp,raw_value,status_code,user_agent,ip_address,custom_time
2+
2024-05-01T10:00:00Z,42,200,Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36,192.168.1.1,2024-05-01 10:00:00
3+
2024-05-01T10:01:00Z,99,404,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7),10.0.0.1,2024-05-01 10:01:00
4+
2024-05-01T10:02:00Z,150,500,curl/7.68.0,172.16.0.1,2024-05-01 10:02:00
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
load "$LIB_BATS_ASSERT/load.bash"
2+
load "$LIB_BATS_SUPPORT/load.bash"
3+
4+
@test "verify grok format definition" {
5+
cat << EOF > $TAILPIPE_INSTALL_DIR/config/format_grok.tpc
6+
format "grok" "steampipe_plugin" {
7+
layout = \`%{TIMESTAMP_ISO8601:timestamp} %{WORD:timezone} \[%{LOGLEVEL:severity}\]\s+(?:%{NOTSPACE:plugin_name}: \[%{LOGLEVEL:plugin_severity}\]\s+%{NUMBER:plugin_timestamp}:\s+)?%{GREEDYDATA:message}\`
8+
}
9+
10+
table "steampipe_plugin" {
11+
format = format.grok.steampipe_plugin
12+
13+
column "tp_timestamp" {
14+
source = "timestamp"
15+
}
16+
17+
column "plugin_timestamp" {
18+
type = "timestamp"
19+
}
20+
}
21+
22+
partition "steampipe_plugin" "local" {
23+
source "file" {
24+
format = format.grok.steampipe_plugin
25+
paths = ["$SOURCE_FILES_DIR/custom_logs/"]
26+
file_layout = \`plugin-%{YEAR:year}-%{MONTHNUM:month}-%{MONTHDAY:day}.log\`
27+
}
28+
}
29+
EOF
30+
31+
# Run collection and verify
32+
tailpipe collect steampipe_plugin --progress=false --from=2025-04-26
33+
34+
# Verify data was collected correctly
35+
run tailpipe query "select plugin_name, tp_timestamp from steampipe_plugin limit 1" --output csv
36+
echo $output
37+
38+
assert_equal "$output" "plugin_name,tp_timestamp
39+
steampipe-plugin-aws.plugin,2025-04-28 15:16:35"
40+
41+
# Cleanup
42+
rm -rf $TAILPIPE_INSTALL_DIR/config/format_grok.tpc
43+
}
44+
45+
@test "verify delimited format definition" {
46+
cat << EOF > $TAILPIPE_INSTALL_DIR/config/format_delimited.tpc
47+
format "delimited" "access_log" {
48+
delimiter = ","
49+
header = true
50+
}
51+
52+
table "access_log" {
53+
format = format.delimited.access_log
54+
55+
column "tp_timestamp" {
56+
source = "timestamp"
57+
}
58+
59+
column "ip_address" {
60+
type = "varchar"
61+
}
62+
63+
column "user_agent" {
64+
type = "varchar"
65+
}
66+
67+
column "status_code" {
68+
type = "integer"
69+
}
70+
}
71+
72+
partition "access_log" "local" {
73+
source "file" {
74+
format = format.delimited.access_log
75+
paths = ["$SOURCE_FILES_DIR/custom_logs/"]
76+
file_layout = "access_log.csv"
77+
}
78+
}
79+
EOF
80+
81+
# Run collection and verify
82+
tailpipe collect access_log --progress=false --from=2024-04-30
83+
84+
# Verify data was collected correctly
85+
run tailpipe query "select ip_address, status_code from access_log limit 1" --output csv
86+
echo $output
87+
88+
assert_equal "$output" "ip_address,status_code
89+
192.168.1.1,200"
90+
91+
# Cleanup
92+
rm -rf $TAILPIPE_INSTALL_DIR/config/format_delimited.tpc
93+
}
94+
95+
@test "verify jsonl format definition" {
96+
cat << EOF > $TAILPIPE_INSTALL_DIR/config/format_jsonl.tpc
97+
format "jsonl" "server_metrics" {
98+
description = "Server metrics in JSON Lines format"
99+
}
100+
101+
table "server_metrics" {
102+
format = format.jsonl.server_metrics
103+
104+
column "tp_timestamp" {
105+
source = "timestamp"
106+
}
107+
108+
column "server_id" {
109+
type = "varchar"
110+
}
111+
112+
column "cpu_usage" {
113+
type = "float"
114+
}
115+
116+
column "memory_used" {
117+
type = "integer"
118+
}
119+
120+
column "is_healthy" {
121+
type = "boolean"
122+
}
123+
}
124+
125+
partition "server_metrics" "local" {
126+
source "file" {
127+
format = format.jsonl.server_metrics
128+
paths = ["$SOURCE_FILES_DIR/custom_logs/"]
129+
file_layout = "server_metrics.jsonl"
130+
}
131+
}
132+
EOF
133+
134+
# Run collection and verify
135+
tailpipe collect server_metrics --progress=false --from=2024-04-30
136+
137+
# Verify data was collected correctly
138+
run tailpipe query "select server_id, cpu_usage, memory_used, is_healthy from server_metrics limit 1" --output csv
139+
echo $output
140+
141+
assert_equal "$output" "server_id,cpu_usage,memory_used,is_healthy
142+
srv-001,75.5,8192,true"
143+
144+
# Cleanup
145+
rm -rf $TAILPIPE_INSTALL_DIR/config/format_jsonl.tpc
146+
}
147+
148+
@test "verify regex format definition" {
149+
cat << EOF > $TAILPIPE_INSTALL_DIR/config/format_regex.tpc
150+
format "regex" "plugin_log" {
151+
layout = \`^(?P<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{3})\s+(?P<timezone>\w+)\s+\[(?P<log_level>\w+)\]\s+(?P<plugin_name>[\w.-]+)?(?:\s*:\s*\[(?P<plugin_log_level>\w+)\]\s+(?P<plugin_timestamp>\d+):\s+)?(?P<message>.*)\`
152+
}
153+
154+
table "plugin_log" {
155+
format = format.regex.plugin_log
156+
157+
column "tp_timestamp" {
158+
source = "timestamp"
159+
}
160+
161+
column "log_level" {
162+
type = "varchar"
163+
}
164+
165+
column "plugin_name" {
166+
type = "varchar"
167+
}
168+
169+
column "plugin_log_level" {
170+
type = "varchar"
171+
}
172+
173+
column "message" {
174+
type = "varchar"
175+
}
176+
}
177+
178+
partition "plugin_log" "local" {
179+
source "file" {
180+
format = format.regex.plugin_log
181+
paths = ["$SOURCE_FILES_DIR/custom_logs/"]
182+
file_layout = \`plugin-%{YEAR:year}-%{MONTHNUM:month}-%{MONTHDAY:day}.log\`
183+
}
184+
}
185+
EOF
186+
187+
# Run collection and verify
188+
tailpipe collect plugin_log --progress=false --from=2025-04-28
189+
190+
# Verify data was collected correctly
191+
run tailpipe query "select log_level, plugin_name, message from plugin_log limit 1" --output csv
192+
echo $output
193+
194+
assert_equal "$output" "log_level,plugin_name,message
195+
DEBUG,steampipe-plugin-aws.plugin,\"retrying request Lambda/ListFunctions, attempt 8\""
196+
197+
# Cleanup
198+
rm -rf $TAILPIPE_INSTALL_DIR/config/format_regex.tpc
199+
}
200+
201+
@test "verify grok format with nested patterns" {
202+
cat << EOF > $TAILPIPE_INSTALL_DIR/config/format_grok_nested.tpc
203+
format "grok" "aws_log" {
204+
layout = \`%{TIMESTAMP_ISO8601:timestamp} \[%{LOGLEVEL:log_level}\] \[AWS\] RequestID: %{NOTSPACE:request_id}, Service: %{WORD:service}, Operation: %{WORD:operation}(?:, Duration: %{NUMBER:duration}ms)?(?:, Error: %{GREEDYDATA:error})?\`
205+
}
206+
207+
table "aws_log" {
208+
format = format.grok.aws_log
209+
210+
column "tp_timestamp" {
211+
source = "timestamp"
212+
}
213+
214+
column "log_level" {
215+
type = "varchar"
216+
}
217+
218+
column "request_id" {
219+
type = "varchar"
220+
}
221+
222+
column "service" {
223+
type = "varchar"
224+
}
225+
226+
column "operation" {
227+
type = "varchar"
228+
}
229+
230+
column "duration" {
231+
type = "integer"
232+
}
233+
234+
column "error" {
235+
type = "varchar"
236+
}
237+
}
238+
239+
partition "aws_log" "local" {
240+
source "file" {
241+
format = format.grok.aws_log
242+
paths = ["$SOURCE_FILES_DIR/custom_logs/"]
243+
file_layout = "nested_patterns.log"
244+
}
245+
}
246+
EOF
247+
248+
# Run collection and verify
249+
tailpipe collect aws_log --progress=false --from=2024-04-30
250+
251+
# Verify data was collected correctly
252+
run tailpipe query "select log_level, service, operation, duration from aws_log order by request_id" --output csv
253+
echo $output
254+
255+
assert_equal "$output" "log_level,service,operation,duration
256+
INFO,s3,ListBuckets,150
257+
ERROR,ec2,DescribeInstances,
258+
DEBUG,lambda,Invoke,45"
259+
260+
# Cleanup
261+
rm -rf $TAILPIPE_INSTALL_DIR/config/format_grok_nested.tpc
262+
}
263+
264+
function teardown() {
265+
rm -rf $TAILPIPE_INSTALL_DIR/data
266+
}

0 commit comments

Comments
 (0)