Skip to content

Commit 2a390ac

Browse files
authored
Improve access and error log parsing (#1438)
1 parent 4e188b5 commit 2a390ac

File tree

8 files changed

+113
-83
lines changed

8 files changed

+113
-83
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ jobs:
306306
- name: Login to Docker Registry
307307
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
308308
with:
309-
registry: ${{ secrets.REGISTRY_URL }}
309+
registry: ${{ secrets.TEST_REGISTRY_URL }}
310310
username: ${{ secrets.REGISTRY_USERNAME }}
311311
password: ${{ secrets.REGISTRY_PASSWORD }}
312312
- name: Set Start Time
@@ -320,7 +320,7 @@ jobs:
320320
- name: Run Integration Tests
321321
run: |
322322
go install github.com/goreleaser/nfpm/v2/cmd/nfpm@${{ env.NFPM_VERSION }}
323-
CONTAINER_NGINX_IMAGE_REGISTRY="${{ secrets.REGISTRY_URL }}" TAG="${{ matrix.container.plus }}-${{ matrix.container.image }}-${{ matrix.container.version }}" \
323+
CONTAINER_NGINX_IMAGE_REGISTRY="${{ secrets.TEST_REGISTRY_URL }}" TAG="${{ matrix.container.plus }}-${{ matrix.container.image }}-${{ matrix.container.version }}" \
324324
OS_RELEASE="${{ matrix.container.release }}" IMAGE_PATH="${{ matrix.container.path }}" \
325325
make official-image-integration-test | tee ${{github.workspace}}/test/dashboard/logs/${{github.job}}/${{matrix.container.image}}${{matrix.container.version}}/raw_logs.log && exit "${PIPESTATUS[0]}"
326326
- name: Generate Test Results

docs/proto/proto.md

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
- [AgentConnectStatus.StatusCode](#f5-nginx-agent-sdk-AgentConnectStatus-StatusCode)
1919
- [AgentLogging.Level](#f5-nginx-agent-sdk-AgentLogging-Level)
2020

21+
- [command_svc.proto](#command_svc-proto)
22+
- [Commander](#f5-nginx-agent-sdk-Commander)
23+
2124
- [command.proto](#command-proto)
2225
- [AgentActivityStatus](#f5-nginx-agent-sdk-AgentActivityStatus)
2326
- [ChunkedResourceChunk](#f5-nginx-agent-sdk-ChunkedResourceChunk)
@@ -39,9 +42,6 @@
3942
- [NginxConfigStatus.Status](#f5-nginx-agent-sdk-NginxConfigStatus-Status)
4043
- [UploadStatus.TransferStatus](#f5-nginx-agent-sdk-UploadStatus-TransferStatus)
4144

42-
- [command_svc.proto](#command_svc-proto)
43-
- [Commander](#f5-nginx-agent-sdk-Commander)
44-
4545
- [common.proto](#common-proto)
4646
- [CertificateDates](#f5-nginx-agent-sdk-CertificateDates)
4747
- [CertificateName](#f5-nginx-agent-sdk-CertificateName)
@@ -341,6 +341,34 @@ Log level enum
341341

342342

343343

344+
<a name="command_svc-proto"></a>
345+
<p align="right"><a href="#top">Top</a></p>
346+
347+
## command_svc.proto
348+
349+
350+
351+
352+
353+
354+
355+
356+
357+
<a name="f5-nginx-agent-sdk-Commander"></a>
358+
359+
### Commander
360+
Represents a service used to sent command messages between the management server and the agent.
361+
362+
| Method Name | Request Type | Response Type | Description |
363+
| ----------- | ------------ | ------------- | ------------|
364+
| CommandChannel | [Command](#f5-nginx-agent-sdk-Command) stream | [Command](#f5-nginx-agent-sdk-Command) stream | A Bidirectional streaming RPC established by the agent and is kept open |
365+
| Download | [DownloadRequest](#f5-nginx-agent-sdk-DownloadRequest) | [DataChunk](#f5-nginx-agent-sdk-DataChunk) stream | A streaming RPC established by the agent and is used to download resources associated with commands The download stream will be kept open for the duration of the data transfer and will be closed when its done. The transfer is a stream of chunks as follows: header -&gt; data chunk 1 -&gt; data chunk N. Each data chunk is of a size smaller than the maximum gRPC payload |
366+
| Upload | [DataChunk](#f5-nginx-agent-sdk-DataChunk) stream | [UploadStatus](#f5-nginx-agent-sdk-UploadStatus) | A streaming RPC established by the agent and is used to upload resources associated with commands |
367+
368+
369+
370+
371+
344372
<a name="command-proto"></a>
345373
<p align="right"><a href="#top">Top</a></p>
346374

@@ -652,34 +680,6 @@ Transfer status enum
652680

653681

654682

655-
<a name="command_svc-proto"></a>
656-
<p align="right"><a href="#top">Top</a></p>
657-
658-
## command_svc.proto
659-
660-
661-
662-
663-
664-
665-
666-
667-
668-
<a name="f5-nginx-agent-sdk-Commander"></a>
669-
670-
### Commander
671-
Represents a service used to sent command messages between the management server and the agent.
672-
673-
| Method Name | Request Type | Response Type | Description |
674-
| ----------- | ------------ | ------------- | ------------|
675-
| CommandChannel | [Command](#f5-nginx-agent-sdk-Command) stream | [Command](#f5-nginx-agent-sdk-Command) stream | A Bidirectional streaming RPC established by the agent and is kept open |
676-
| Download | [DownloadRequest](#f5-nginx-agent-sdk-DownloadRequest) | [DataChunk](#f5-nginx-agent-sdk-DataChunk) stream | A streaming RPC established by the agent and is used to download resources associated with commands The download stream will be kept open for the duration of the data transfer and will be closed when its done. The transfer is a stream of chunks as follows: header -&gt; data chunk 1 -&gt; data chunk N. Each data chunk is of a size smaller than the maximum gRPC payload |
677-
| Upload | [DataChunk](#f5-nginx-agent-sdk-DataChunk) stream | [UploadStatus](#f5-nginx-agent-sdk-UploadStatus) | A streaming RPC established by the agent and is used to upload resources associated with commands |
678-
679-
680-
681-
682-
683683
<a name="common-proto"></a>
684684
<p align="right"><a href="#top">Top</a></p>
685685

src/core/metrics/sources/common.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ func (n *namedMetric) label(name string) string {
3737
if name == "" {
3838
return ""
3939
}
40+
4041
switch {
4142
case n.namespace != "" && n.group != "":
4243
return strings.Join([]string{n.namespace, n.group, name}, ".")
@@ -45,6 +46,7 @@ func (n *namedMetric) label(name string) string {
4546
case n.group != "":
4647
return strings.Join([]string{n.group, name}, ".")
4748
}
49+
4850
return name
4951
}
5052

src/core/metrics/sources/nginx_access_log.go

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,13 @@ var logVarRegex = regexp.MustCompile(`\$([a-zA-Z]+[_[a-zA-Z]+]*)`)
6262
// This metrics source is used to tail the NGINX access logs to retrieve metrics.
6363

6464
type NginxAccessLog struct {
65-
baseDimensions *metrics.CommonDim
66-
*namedMetric
65+
baseDimensions *metrics.CommonDim
6766
mu *sync.Mutex
6867
logFormats map[string]string
6968
logs map[string]context.CancelFunc
7069
binary core.NginxBinary
7170
nginxType string
71+
namespace string
7272
collectionInterval time.Duration
7373
buf []*metrics.StatsEntityWrapper
7474
logger *MetricSourceLogger
@@ -85,12 +85,12 @@ func NewNginxAccessLog(
8585

8686
nginxAccessLog := &NginxAccessLog{
8787
baseDimensions,
88-
&namedMetric{namespace: namespace},
8988
&sync.Mutex{},
9089
make(map[string]string),
9190
make(map[string]context.CancelFunc),
9291
binary,
9392
nginxType,
93+
namespace,
9494
collectionInterval,
9595
[]*metrics.StatsEntityWrapper{},
9696
NewMetricSourceLogger(),
@@ -203,8 +203,8 @@ func (c *NginxAccessLog) logStats(ctx context.Context, logFile, logFormat string
203203

204204
logPattern = replaceCustomLogVars(logPattern)
205205

206-
log.Debugf("Collecting from: %s using format: %s", logFile, logFormat)
207-
log.Debugf("Pattern used for tailing logs: %s", logPattern)
206+
log.Debugf("Collecting from %s using format %s", logFile, logFormat)
207+
log.Debugf("Pattern used for tailing logs, %s", logPattern)
208208

209209
httpCounters, upstreamCounters, upstreamCacheCounters := map[string]float64{}, map[string]float64{}, map[string]float64{}
210210
gzipRatios, requestLengths, requestTimes, upstreamResponseLength, upstreamResponseTimes, upstreamConnectTimes, upstreamHeaderTimes := []float64{}, []float64{}, []float64{}, []float64{}, []float64{}, []float64{}, []float64{}
@@ -214,19 +214,21 @@ func (c *NginxAccessLog) logStats(ctx context.Context, logFile, logFormat string
214214
if logPattern == "ltsv" {
215215
t, err := tailer.NewLTSVTailer(logFile)
216216
if err != nil {
217-
log.Errorf("unable to tail %q: %v", logFile, err)
217+
log.Errorf("Unable to tail %q: %v", logFile, err)
218218
return
219219
}
220220
go t.Tail(ctx, data)
221221
} else {
222222
t, err := tailer.NewPatternTailer(logFile, map[string]string{"DEFAULT": logPattern})
223223
if err != nil {
224-
log.Errorf("unable to tail %q: %v", logFile, err)
224+
log.Errorf("Unable to tail %q: %v", logFile, err)
225225
return
226226
}
227227
go t.Tail(ctx, data)
228228
}
229229

230+
accessLogNamedMetric := namedMetric{namespace: c.namespace}
231+
230232
tick := time.NewTicker(c.collectionInterval)
231233
defer tick.Stop()
232234
for {
@@ -295,7 +297,7 @@ func (c *NginxAccessLog) logStats(ctx context.Context, logFile, logFormat string
295297
upstreamCounters[n] = 1
296298
}
297299
} else {
298-
log.Debugf("Error getting upstream status value from access logs, %v", err)
300+
log.Debugf("Error getting upstream status value from access log %s, %v", logFile, err)
299301
}
300302
}
301303

@@ -330,6 +332,8 @@ func (c *NginxAccessLog) logStats(ctx context.Context, logFile, logFormat string
330332
case <-tick.C:
331333
mu.Lock()
332334

335+
log.Tracef("Collecting metrics from access log: %s", logFile)
336+
333337
c.baseDimensions.NginxType = c.nginxType
334338
c.baseDimensions.PublishedAPI = logFile
335339

@@ -361,22 +365,23 @@ func (c *NginxAccessLog) logStats(ctx context.Context, logFile, logFormat string
361365
upstreamCounters["upstream.response.length"] = getAverageMetricValue(upstreamResponseLength)
362366
}
363367

364-
c.group = "http"
365-
simpleMetrics := c.convertSamplesToSimpleMetrics(httpCounters)
366-
367-
c.group = ""
368-
simpleMetrics = append(simpleMetrics, c.convertSamplesToSimpleMetrics(upstreamCounters)...)
368+
accessLogNamedMetric.group = "http"
369+
log.Tracef("Converting httpCounters for %s access log file, httpCounters=%v", logFile, httpCounters)
370+
simpleMetrics := accessLogNamedMetric.convertSamplesToSimpleMetrics(httpCounters)
369371

370-
c.group = ""
371-
simpleMetrics = append(simpleMetrics, c.convertSamplesToSimpleMetrics(upstreamCacheCounters)...)
372+
accessLogNamedMetric.group = ""
373+
log.Tracef("Converting upstreamCounters for %s access log file, upstreamCounters=%v", logFile, upstreamCounters)
374+
simpleMetrics = append(simpleMetrics, accessLogNamedMetric.convertSamplesToSimpleMetrics(upstreamCounters)...)
375+
log.Tracef("Converting upstreamCacheCounters for %s access log file, upstreamCacheCounters=%v", logFile, upstreamCacheCounters)
376+
simpleMetrics = append(simpleMetrics, accessLogNamedMetric.convertSamplesToSimpleMetrics(upstreamCacheCounters)...)
372377

373-
log.Tracef("Access log metrics collected: %v", simpleMetrics)
378+
log.Tracef("Access log %s metrics collected: %v", logFile, simpleMetrics)
374379

375380
// reset the counters
376381
httpCounters, upstreamCounters, upstreamCacheCounters = map[string]float64{}, map[string]float64{}, map[string]float64{}
377382
gzipRatios, requestLengths, requestTimes, upstreamResponseLength, upstreamResponseTimes, upstreamConnectTimes, upstreamHeaderTimes = []float64{}, []float64{}, []float64{}, []float64{}, []float64{}, []float64{}, []float64{}
378383

379-
log.Debugf("access log stats count: %d", len(simpleMetrics))
384+
log.Debugf("Access log %s stats count: %d", logFile, len(simpleMetrics))
380385

381386
c.buf = append(c.buf, metrics.NewStatsEntityWrapper(c.baseDimensions.ToDimensions(), simpleMetrics, proto.MetricsReport_INSTANCE))
382387

src/core/metrics/sources/nginx_error_log.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ var regularExpressionErrorMap = map[string][]*re.Regexp{
5757
}
5858

5959
type NginxErrorLog struct {
60-
baseDimensions *metrics.CommonDim
61-
*namedMetric
60+
baseDimensions *metrics.CommonDim
6261
mu *sync.Mutex
6362
logFormats map[string]string
6463
logs map[string]context.CancelFunc
6564
binary core.NginxBinary
6665
nginxType string
66+
namespace string
6767
collectionInterval time.Duration
6868
buf []*metrics.StatsEntityWrapper
6969
}
@@ -78,12 +78,12 @@ func NewNginxErrorLog(
7878
log.Trace("Creating NewNginxErrorLog")
7979
nginxErrorLog := &NginxErrorLog{
8080
baseDimensions,
81-
&namedMetric{namespace: namespace},
8281
&sync.Mutex{},
8382
make(map[string]string),
8483
make(map[string]context.CancelFunc),
8584
binary,
8685
nginxType,
86+
namespace,
8787
collectionInterval,
8888
[]*metrics.StatsEntityWrapper{},
8989
}
@@ -111,7 +111,7 @@ func (c *NginxErrorLog) Stop() {
111111
fn()
112112
delete(c.logs, f)
113113
}
114-
log.Debugf("Stopping NginxErrorLog source for nginx id: %v", c.baseDimensions.NginxId)
114+
log.Debugf("Stopping NginxErrorLog source for NGINX ID: %v", c.baseDimensions.NginxId)
115115
}
116116

117117
func (c *NginxErrorLog) Update(dimensions *metrics.CommonDim, collectorConf *metrics.NginxCollectorConfig) {
@@ -189,14 +189,14 @@ func (c *NginxErrorLog) collectLogStats(_ context.Context, m chan<- *metrics.Sta
189189
}
190190

191191
func (c *NginxErrorLog) logStats(ctx context.Context, logFile string) {
192-
log.Debugf("Collecting from error log: %s", logFile)
192+
log.Debugf("Collecting from error log %s", logFile)
193193

194194
counters := map[string]float64{}
195195
mu := sync.Mutex{}
196196

197197
t, err := tailer.NewTailer(logFile)
198198
if err != nil {
199-
log.Errorf("Unable to tail %q: %v", logFile, err)
199+
log.Errorf("Unable to tail %s, %v", logFile, err)
200200
return
201201
}
202202
data := make(chan string, 1024)
@@ -205,6 +205,8 @@ func (c *NginxErrorLog) logStats(ctx context.Context, logFile string) {
205205
tick := time.NewTicker(c.collectionInterval)
206206
defer tick.Stop()
207207

208+
errorLogNamedMetric := namedMetric{namespace: c.namespace}
209+
208210
for {
209211
select {
210212
case d := <-data:
@@ -228,15 +230,21 @@ func (c *NginxErrorLog) logStats(ctx context.Context, logFile string) {
228230

229231
case <-tick.C:
230232
mu.Lock()
233+
234+
log.Tracef("Collecting metrics from error log: %s", logFile)
235+
231236
c.baseDimensions.NginxType = c.nginxType
232237
c.baseDimensions.PublishedAPI = logFile
233238

234-
simpleMetrics := c.convertSamplesToSimpleMetrics(counters)
235-
log.Tracef("Error log metrics collected: %v", simpleMetrics)
239+
log.Tracef("Converting counters for %s error log file, counters=%v", logFile, counters)
240+
simpleMetrics := errorLogNamedMetric.convertSamplesToSimpleMetrics(counters)
241+
log.Tracef("Error log %s metrics collected: %v", logFile, simpleMetrics)
236242

237243
// reset the counters
238244
counters = map[string]float64{}
239245

246+
log.Debugf("Error log %s stats count: %d", logFile, len(simpleMetrics))
247+
240248
c.buf = append(c.buf, metrics.NewStatsEntityWrapper(c.baseDimensions.ToDimensions(), simpleMetrics, proto.MetricsReport_INSTANCE))
241249

242250
mu.Unlock()

test/performance/vendor/github.com/nginx/agent/v2/src/core/metrics/sources/common.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)