Skip to content

Commit 13602ea

Browse files
luckyj5rockb1017
andauthored
Merge 2.0.0 changes to master - V2 upgrade (#258)
* RLP gateway logging, error channel, configurable retry. change es channel size to 10000 (#239) * update go-loggregator lib to include rlp client update for perf improvement (#246) * added retry counter (i) in the error message (#223) * Added additional test cases with running binary nuzzle (#226) * Update test_nozzle_configurations.py (#234) * Added performance tests * Data flow monitor (#242) Co-authored-by: Shubham Jain <[email protected]> * Add telemetry data (appname and appversion) to HEC requests (#250) * Metadata config filter (#251) * updated tile to add fields for metadata filter, rlp-retries, queue monitoring, updated unit-tests * Improve efficiency in event source monitoring and AddAppInfo param clean up (#254) * clean event source and restructure AddAppInfo param * upate app-info config param in the tile (#255) * update config per new params and versions (#256) Co-authored-by: Rock Baek <[email protected]>
1 parent ba4518a commit 13602ea

File tree

126 files changed

+13593
-351
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+13593
-351
lines changed

.circleci/ci_nozzle_manifest.yml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,18 @@ applications:
77
cmd: splunk-firehose-nozzle
88
env:
99
GOPACKAGENAME: main
10-
API_ENDPOINT:
11-
CLIENT_ID:
12-
CLIENT_SECRET:
13-
SPLUNK_HOST:
14-
SPLUNK_TOKEN:
15-
SPLUNK_INDEX:
10+
API_ENDPOINT:
11+
CLIENT_ID:
12+
CLIENT_SECRET:
13+
SPLUNK_HOST:
14+
SPLUNK_TOKEN:
15+
SPLUNK_INDEX: main
1616
SKIP_SSL_VALIDATION_CF: true
1717
SKIP_SSL_VALIDATION_SPLUNK: true
1818
JOB_NAME: splunk-nozzle
1919
JOB_INDEX: -1
2020
JOB_HOST: localhost
21-
ADD_APP_INFO: true
21+
ADD_APP_INFO: AppName,OrgName,OrgGuid,SpaceName,SpaceGuid
2222
IGNORE_MISSING_APP: true
2323
MISSING_APP_CACHE_INVALIDATE_TTL: 3600s
2424
APP_CACHE_INVALIDATE_TTL: 86440s
@@ -34,3 +34,5 @@ applications:
3434
HEC_WORKERS: 8
3535
DEBUG: false
3636
ENABLE_EVENT_TRACING: true
37+
RLP_GATEWAY_RETRIES: 5
38+
STATUS_MONITOR_INTERVAL: 0s

.circleci/config.yml

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,37 @@ jobs:
102102
when:
103103
always
104104

105+
execute_perf_tests:
106+
docker:
107+
- image: circleci/golang:1.12
108+
working_directory: /go/src/github.com/cloudfoundry-community/splunk-firehose-nozzle
109+
steps:
110+
- attach_workspace:
111+
at: /tmp
112+
- checkout
113+
- run:
114+
name: Install dependencies
115+
command: |
116+
curl https://glide.sh/get | sh
117+
go get -t ./...
118+
cp -R /tmp/splunk-firehose-nozzle .
119+
- run:
120+
name: Prepare test environment
121+
command: |
122+
.circleci/pre-req.sh
123+
.circleci/pre-functional-test.sh
124+
- run:
125+
name: Executing perf tests
126+
command: |
127+
.circleci/performance-test.sh
128+
- run:
129+
name: Teardown
130+
command: |
131+
echo "Teardown deployment env"
132+
cf delete-org splunk-ci-org -f
133+
when:
134+
always
135+
105136
workflows:
106137
version: 2
107138
build-and-deploy-nozzle:
@@ -110,11 +141,6 @@ workflows:
110141
- deploy-nozzle:
111142
requires:
112143
- build
113-
filters:
114-
branches:
115-
only:
116-
- develop
117-
- master
118144
# - tile-builder:
119145
# requires:
120146
# - deploy-nozzle
@@ -123,10 +149,26 @@ workflows:
123149
# only: master
124150
- execute_tests:
125151
requires:
126-
- build
127152
- deploy-nozzle
128-
filters:
129-
branches:
130-
only:
131-
- develop
132-
- master
153+
154+
155+
# - execute_perf_tests:
156+
# requires:
157+
# - build
158+
# filters:
159+
# branches:
160+
# only:
161+
# - dev/pcf-performance-testing
162+
# nightly:
163+
# triggers:
164+
# - schedule:
165+
# cron: "0 7 * * *"
166+
# filters:
167+
# branches:
168+
# only:
169+
# - dev/pcf-performance-testing
170+
# jobs:
171+
# - build
172+
# - execute_perf_tests:
173+
# requires:
174+
# - build

.circleci/functional-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33
cd testing/integration
44
. venv/bin/activate
5-
pytest
5+
pytest -v -m Critical

.circleci/performance-test.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/env bash
2+
3+
cd testing/integration
4+
. venv/bin/activate
5+
pytest -v -m Perf_Binary
6+
pytest -v -m Perf_Romote

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,6 @@ vendor/github.com/cloudfoundry/sonde-go/definitions/
1717
testing/integration/venv/
1818
testing/integration/config/local.ini
1919
*.pyc
20+
testing/integration/config/env.json
21+
testing/manual_perf/data_gen_manifest.yml
22+

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ testall: test vet race cov
6565
test:
6666
@go test ${PKGS}
6767

68+
testv:
69+
@go test -v ${PKGS}
6870
# Run "short" unit tests
6971
test-short:
7072
@go test -short ${PKGS}

README.md

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ This is recommended for dev environments only.
7979
This is recommended for dev environments only.
8080
* `FIREHOSE_SUBSCRIPTION_ID`: Tags nozzle events with a Firehose subscription id. See https://docs.pivotal.io/pivotalcf/1-11/loggregator/log-ops-guide.html.
8181
* `FIREHOSE_KEEP_ALIVE`: Keep alive duration for the Firehose consumer.
82-
* `ADD_APP_INFO`: Enriches raw data with app details.
82+
* `ADD_APP_INFO`: Enrich raw data with app info. A comma separated list of app metadata (AppName,OrgName,OrgGuid,SpaceName,SpaceGuid).
8383
* `IGNORE_MISSING_APP`: If the application is missing, then stop repeatedly querying application info from Cloud Foundry.
8484
* `MISSING_APP_CACHE_INVALIDATE_TTL`: How frequently the missing app info cache invalidates.
8585
* `APP_CACHE_INVALIDATE_TTL`: How frequently the app info local cache invalidates.
@@ -95,6 +95,9 @@ This is recommended for dev environments only.
9595
* `HEC_WORKERS`: Set the amount of Splunk HEC workers to increase concurrency while ingesting in Splunk.
9696
* `ENABLE_EVENT_TRACING`: Enables event trace logging. Splunk events will now contain a UUID, Splunk Nozzle Event Counts, and a Subscription-ID for Splunk correlation searches.
9797
* `SPLUNK_VERSION`: The Splunk version that determines how HEC ingests metadata fields. Only required for Splunk version 6.3 or below.
98+
* `RLP_GATEWAY_RETRIES`: Number of retries to connect to RLP gateway.
99+
* `STATUS_MONITOR_INTERVAL`: Time interval for monitoring memory queue pressure to help with back-pressure insights.
100+
98101
### Please note
99102
> SPLUNK_VERSION configuration parameter is only required for Splunk version 6.3 and below.
100103
For Splunk version 6.3 or below, please deploy nozzle via CLI. Update nozzle_manifest.yml with splunk_version (eg:- SPLUNK_VERSION: 6.3) as an env variable and [deploy nozzle as an app via CLI](#push-as-an-app-to-cloud-foundry).
@@ -134,11 +137,11 @@ on user authentication.
134137
```
135138

136139
#### Dump application info to boltdb ####
137-
If in production there are lots of PCF applications(say tens of thousands) and if the user would like to enrich
138-
application logs by including application meta data,querying all application metadata information from PCF may take some time.
140+
If in production there are lots of Cloud Foundry applications(say tens of thousands) and if the user would like to enrich
141+
application logs by including application meta data,querying all application metadata information from Cloud Foundry may take some time.
139142
For example if we include, add app name, space ID, space name, org ID and org name to the events.
140143
If there are multiple instances of Spunk nozzle deployed the situation will be even worse, since each of the Splunk nozzle(s) will query all applications meta data and
141-
cache the meta data information to the local boltdb file. These queries will introduce load to the PCF system and could potentially take a long time to finish.
144+
cache the meta data information to the local boltdb file. These queries will introduce load to the Cloud Foundry system and could potentially take a long time to finish.
142145
Users can run this tool to generate a copy of all application meta data and copy this to each Splunk nozzle deployment. Each Splunk nozzle can pick up the cache copy and update the cache file incrementally afterwards.
143146

144147
Example of how to run the dump application info tool:
@@ -167,8 +170,6 @@ applications:
167170
timeout: 180
168171
buildpack: https://github.com/SUSE/stratos-buildpack
169172
health-check-type: port
170-
services:
171-
- splunk-index
172173
env:
173174
SPLUNK_INDEX: testing_index
174175
```
@@ -238,7 +239,7 @@ This topic describes how to troubleshoot Splunk Firehose Nozzle for Cloud Foundr
238239
239240
Are you searching for events and not finding them or looking at a dashboard and seeing "No result found"? Check Splunk Nozzle app logs.
240241
241-
To view the nozzle's logs running on PCF do the following:
242+
To view the nozzle's logs running on Cloud Foundry do the following:
242243
243244
<ol>
244245
<li>Log in as an admin via the CLI.</li>
@@ -310,7 +311,7 @@ A correct setup logs a start message with configuration parameters of the Nozzle
310311
311312
<pre class="terminal">
312313
data: {
313-
add-app-info: true
314+
add-app-info: AppName,OrgName,OrgGuid,SpaceName,SpaceGuid
314315
api-endpoint: https://api.endpoint.com
315316
app-cache-ttl: 0
316317
app-limits: 0
@@ -337,7 +338,8 @@ A correct setup logs a start message with configuration parameters of the Nozzle
337338
splunk-version: 6.6
338339
subscription-id: splunk-firehose
339340
trace-logging: true
340-
version:
341+
rlp-gateway-retries: 5
342+
status-monitor-interval: 0s
341343
wanted-events: ValueMetric,CounterEvent,Error,LogMessage,HttpStartStop,ContainerMetric
342344
}
343345
ip: 10.0.0.0
@@ -394,7 +396,7 @@ Make sure you have the following installed on your workstation:
394396
395397
| Software | Version
396398
| --- | --- |
397-
| go | go1.8.x
399+
| go | go1.12.x
398400
| glide | 0.12.x
399401
400402
Then install all dependent packages via [Glide](https://glide.sh/):
@@ -417,7 +419,7 @@ $ chmod +x tools/nozzle.sh
417419
Build project:
418420
419421
```
420-
$ make VERSION=1.1
422+
$ make VERSION=2.0.0
421423
```
422424
423425
Run tests with [Ginkgo](http://onsi.github.io/ginkgo/)

cache/boltdb.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,13 @@ func (c *Boltdb) Open() error {
7878
// Open bolt db
7979
db, err := bolt.Open(c.config.Path, 0600, &bolt.Options{Timeout: 5 * time.Second})
8080
if err != nil {
81-
c.config.Logger.Error("Fail to open boltdb: ", err)
81+
c.config.Logger.Error("Failed to open boltdb: ", err)
8282
return err
8383
}
8484
c.appdb = db
8585

8686
if err := c.createBucket(); err != nil {
87-
c.config.Logger.Error("Fail to create bucket: ", err)
87+
c.config.Logger.Error("Failed to create bucket: ", err)
8888
return err
8989
}
9090

eventrouter/default.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@ import (
99
"github.com/cloudfoundry/sonde-go/events"
1010
)
1111

12-
type Config struct {
13-
SelectedEvents string
14-
}
12+
type Config = fevents.Config
1513

1614
type router struct {
1715
appCache cache.Cache
1816
sink eventsink.Sink
1917
selectedEvents map[string]bool
18+
config *Config
2019
}
2120

2221
func New(appCache cache.Cache, sink eventsink.Sink, config *Config) (Router, error) {
@@ -30,6 +29,7 @@ func New(appCache cache.Cache, sink eventsink.Sink, config *Config) (Router, err
3029
appCache: appCache,
3130
sink: sink,
3231
selectedEvents: selectedEvents,
32+
config: config,
3333
}, nil
3434
}
3535

@@ -64,7 +64,7 @@ func (r *router) Route(msg *events.Envelope) error {
6464
event.AnnotateWithCFMetaData()
6565

6666
if _, hasAppId := event.Fields["cf_app_id"]; hasAppId {
67-
event.AnnotateWithAppData(r.appCache)
67+
event.AnnotateWithAppData(r.appCache, r.config)
6868
}
6969

7070
if ignored, ok := event.Fields["cf_ignored_app"]; ok {

eventrouter/eventrouter_test.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,6 @@ var _ = Describe("eventrouter", func() {
137137
Expect(len(memSink.Messages)).To(Equal(0))
138138
})
139139

140-
It("Route ignore app", func() {
141-
noCache.SetIgnoreApp(true)
142-
eventType = events.Envelope_LogMessage
143-
err := r.Route(msg)
144-
Ω(err).ShouldNot(HaveOccurred())
145-
Expect(len(memSink.Events)).To(Equal(0))
146-
Expect(len(memSink.Messages)).To(Equal(0))
147-
})
148-
149140
It("Route sink error", func() {
150141
memSink.ReturnErr = true
151142
eventType = events.Envelope_LogMessage

events/events.go

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,23 @@ type Event struct {
1818
Type string
1919
}
2020

21+
type Config struct {
22+
SelectedEvents string
23+
AddAppName bool
24+
AddOrgName bool
25+
AddOrgGuid bool
26+
AddSpaceName bool
27+
AddSpaceGuid bool
28+
}
29+
30+
var AppMetadata = []string{
31+
"AppName",
32+
"OrgName",
33+
"OrgGuid",
34+
"SpaceName",
35+
"SpaceGuid",
36+
}
37+
2138
func HttpStartStop(msg *events.Envelope) *Event {
2239
httpStartStop := msg.GetHttpStartStop()
2340

@@ -125,7 +142,7 @@ func ContainerMetric(msg *events.Envelope) *Event {
125142
}
126143
}
127144

128-
func (e *Event) AnnotateWithAppData(appCache cache.Cache) {
145+
func (e *Event) AnnotateWithAppData(appCache cache.Cache, config *Config) {
129146
cf_app_id := e.Fields["cf_app_id"]
130147
appGuid := fmt.Sprintf("%s", cf_app_id)
131148

@@ -142,39 +159,40 @@ func (e *Event) AnnotateWithAppData(appCache cache.Cache) {
142159
cf_space_name := appInfo.SpaceName
143160
cf_org_id := appInfo.OrgGuid
144161
cf_org_name := appInfo.OrgName
145-
cf_ignored_app := appInfo.IgnoredApp
162+
//cf_ignored_app := appInfo.IgnoredApp
146163
app_env := appInfo.CfAppEnv
147164

148-
if cf_app_name != "" {
165+
if cf_app_name != "" && config.AddAppName {
149166
e.Fields["cf_app_name"] = cf_app_name
150167
}
151168

152-
if cf_space_id != "" {
169+
if cf_space_id != "" && config.AddSpaceGuid {
153170
e.Fields["cf_space_id"] = cf_space_id
154171
}
155172

156-
if cf_space_name != "" {
173+
if cf_space_name != "" && config.AddSpaceName {
157174
e.Fields["cf_space_name"] = cf_space_name
158175
}
159176

160-
if cf_org_id != "" {
177+
if cf_org_id != "" && config.AddOrgGuid {
161178
e.Fields["cf_org_id"] = cf_org_id
162179
}
163180

164-
if cf_org_name != "" {
181+
if cf_org_name != "" && config.AddOrgName {
165182
e.Fields["cf_org_name"] = cf_org_name
166183
}
167184

168185
if app_env["SPLUNK_INDEX"] != nil {
169186
e.Fields["info_splunk_index"] = app_env["SPLUNK_INDEX"]
170187
}
171-
172-
e.Fields["cf_ignored_app"] = cf_ignored_app
188+
//removing cf_ignored_app as per INGEST-17639
189+
//e.Fields["cf_ignored_app"] = cf_ignored_app
173190
}
174191
}
175192

176193
func (e *Event) AnnotateWithCFMetaData() {
177-
e.Fields["cf_origin"] = "firehose"
194+
//removing cf_origin as per INGEST-17639
195+
//e.Fields["cf_origin"] = "firehose"
178196
e.Fields["event_type"] = e.Type
179197
}
180198

@@ -248,3 +266,7 @@ func ParseExtraFields(extraEventsString string) (map[string]string, error) {
248266
}
249267
return extraEvents, nil
250268
}
269+
270+
func AuthorizedMetadata() string {
271+
return strings.Join(AppMetadata, ", ")
272+
}

0 commit comments

Comments
 (0)