Skip to content

Commit d9c9779

Browse files
committed
Integration tests and kuberentes_monitor / k8s implementation
1 parent c065ef0 commit d9c9779

File tree

7 files changed

+258
-8
lines changed

7 files changed

+258
-8
lines changed

.github/actions/install-k8s-agent/action.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,15 @@ inputs:
99
scalyr_api_key:
1010
description: "Write API key to be used by the agent."
1111
required: true
12+
scalyr_api_key_team_1:
13+
description: "Write API key to be used by the agent."
14+
required: false
15+
scalyr_api_key_team_2:
16+
description: "Write API key to be used by the agent."
17+
required: false
18+
scalyr_api_key_team_3:
19+
description: "Write API key to be used by the agent."
20+
required: false
1221
scalyr_cluster_name:
1322
description: "Cluster name to use."
1423
required: true
@@ -80,6 +89,21 @@ runs:
8089
8190
# Define api key
8291
kubectl create secret generic scalyr-api-key --namespace scalyr --from-literal=scalyr-api-key="${{ inputs.scalyr_api_key }}"
92+
93+
# Create a secret if the scalyr_api_key_team_1 is set
94+
if [ ! -z "${{ inputs.scalyr_api_key_team_1 }}" ]; then
95+
kubectl create secret generic scalyr-api-key-team-1 --namespace scalyr --from-literal=scalyr-api-key="${{ inputs.scalyr_api_key_team_1 }}"
96+
fi
97+
98+
# Create a secret if the scalyr_api_key_team_2 is set
99+
if [ ! -z "${{ inputs.scalyr_api_key_team_2 }}" ]; then
100+
kubectl create secret generic scalyr-api-key-team-2 --namespace scalyr --from-literal=scalyr-api-key="${{ inputs.scalyr_api_key_team_2 }}"
101+
fi
102+
103+
# Create a secret if the scalyr_api_key_team_3 is set
104+
if [ ! -z "${{ inputs.scalyr_api_key_team_3 }}" ]; then
105+
kubectl create secret generic scalyr-api-key-team-3 --namespace scalyr --from-literal=scalyr-api-key="${{ inputs.scalyr_api_key_team_3 }}"
106+
fi
83107
84108
# Create configmap
85109
kubectl create configmap --namespace scalyr scalyr-config \

.github/workflows/reusable-agent-build-container-images.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,11 @@ jobs:
369369
minikube image ls
370370
kubectl apply -f tests/e2e/k8s_k8s_monitor/std_printer_deployment.yaml
371371
kubectl apply -f tests/e2e/k8s_k8s_monitor/long_message_printer_deployment.yaml
372+
kubectl apply -f tests/e2e/k8s_k8s_monitor/multiple_account_printers.yaml
372373
373374
kubectl wait --for=condition=ready pod -l app=std-printer
374375
kubectl wait --for=condition=ready pod -l app=long-message-printer
376+
kubectl wait --for=condition=ready pod -l app=multiple-account-printer
375377
kubectl get pods -A
376378
377379
export APP_POD_NAME=$(kubectl get pod --namespace=default --selector=app=std-printer -o jsonpath="{.items[0].metadata.name}")
@@ -398,6 +400,9 @@ jobs:
398400
with:
399401
scalyr_server: "agent.scalyr.com"
400402
scalyr_api_key: "${{ secrets.CT_SCALYR_TOKEN_PROD_US_CLOUDTECH_TESTING_WRITE }}"
403+
scalyr_api_key_team_1: "${{ secrets.CT_SCALYR_TOKEN_PROD_US_CLOUDTECH_TESTING_2_WRITE }}"
404+
scalyr_api_key_team_2: "${{ secrets.CT_SCALYR_TOKEN_PROD_US_CLOUDTECH_TESTING_3_WRITE }}"
405+
scalyr_api_key_team_3: "${{ secrets.CT_SCALYR_TOKEN_PROD_US_CLOUDTECH_TESTING_4_WRITE }}"
401406
scalyr_cluster_name: "${K8S_CLUSTER_NAME}"
402407
scalyr_k8s_events_disable: "false"
403408
main_yaml_path: "tests/e2e/scalyr-agent-2-daemonset.yaml"
@@ -520,6 +525,56 @@ jobs:
520525
echo "CronJob events checks"
521526
./scripts/cicd/scalyr-query.sh '$serverHost="'${SCALYR_AGENT_POD_NAME}'" $logfile="/var/log/scalyr-agent-2/kubernetes_events.log" $parser="k8sEvents" k8s-kind="CronJob" involvedObjectKind="CronJob" involvedObjectName="hello" watchEventType="ADDED" reason="SawCompletedJob"'
522527
528+
- name: Verify multiaccount records have been ingested
529+
timeout-minutes: 14
530+
env:
531+
scalyr_api_key_team_1: "${{ secrets.CT_SCALYR_TOKEN_PROD_US_CLOUDTECH_TESTING_WRITE }}"
532+
scalyr_api_key_team_2: "${{ secrets.CT_SCALYR_TOKEN_PROD_US_CLOUDTECH_TESTING_WRITE }}"
533+
scalyr_api_key_team_3: "${{ secrets.CT_SCALYR_TOKEN_PROD_US_CLOUDTECH_TESTING_WRITE }}"
534+
SCALYR_AGENT_POD_NAME: "${{ env.SCALYR_AGENT_POD_NAME }}"
535+
run: |
536+
export RETRY_ATTEMPTS="1"
537+
538+
function scalyr_query() {
539+
scalyr query --columns=message "serverHost=\"${SCALYR_AGENT_POD_NAME}\" app=\"multiple-account-printer\" $@"
540+
}
541+
542+
function ingested_line_count() {
543+
API_KEY=$1
544+
MESSAGE=$2
545+
scalyr_readlog_token=${API_KEY} scalyr_query ${MESSAGE}" | wc -l
546+
}
547+
548+
function assert_ingested_once() {
549+
API_KEY=$1
550+
MESSAGE=$2
551+
LINE_COUNT=`scalyr_readlog_token=${API_KEY} scalyr_query ${MESSAGE}" | wc -l`
552+
}
553+
554+
function assert_not_ingested() {
555+
API_KEY=$1
556+
MESSAGE=$2
557+
scalyr_readlog_token=${API_KEY} scalyr_query ${MESSAGE}" && echo -e $SUCCESS_MSG || echo -e $ERR_MSG && exit 1
558+
}
559+
560+
# See tests/e2e/k8s_k8s_monitor/multiple_account_printers.yaml:16 for the following annotations:
561+
# log.config.scalyr.com/attributes.parser: "test-parser-1"
562+
# log.config.scalyr.com/team1.secret: "scalyr-api-key-team-1"
563+
# log.config.scalyr.com/cont1.team2.secret: "scalyr-api-key-team-2"
564+
# log.config.scalyr.com/cont2.team2.secret: "scalyr-api-key-team-2"
565+
# log.config.scalyr.com/cont2.team3.secret: "scalyr-api-key-team-3"
566+
567+
# Container 1 log is ingested with the scalyr-api-key-team-2 only
568+
scalyr_query(
569+
570+
# Container 2 log is ingested with the scalyr-api-key-team-2 and scalyr-api-key-team-3 only
571+
572+
# Container 3 log is ingested with the scalyr-api-key-team-1 only
573+
574+
575+
scalyr_readlog_token=${{ scalyr_api_key_team_2 }} ./scripts/cicd/scalyr-query.sh '$serverHost="'${SCALYR_AGENT_POD_NAME}'" app="multiple-account-printer" "MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME:cont1'
576+
scalyr_readlog_token=${{ scalyr_api_key_team_2 }} ./scripts/cicd/scalyr-query.sh '$serverHost="'${SCALYR_AGENT_POD_NAME}'" app="multiple-account-printer" "MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME:cont1'
577+
523578
- name: Notify Slack on Failure
524579
if: ${{ failure() && github.ref_name == 'master' }}
525580
uses: act10ns/slack@ed1309ab9862e57e9e583e51c7889486b9a00b0f # v2.0.0
@@ -530,6 +585,7 @@ jobs:
530585
steps: ${{ toJson(steps) }}
531586
channel: '#eng-dataset-cloud-tech'
532587

588+
533589
k8s_open_metrics_monitor_tests:
534590
name: OpenMetrics Monitor - k8s ${{ inputs.builder_name }} ${{ matrix.k8s_version.version }}-${{ matrix.k8s_version.runtime}}
535591
runs-on: ubuntu-20.04

scalyr_agent/builtin_monitors/kubernetes_monitor.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3189,6 +3189,7 @@ def __get_log_config_for_container(self, cid, info, k8s_cache, base_attributes):
31893189
parser = "docker"
31903190
common_annotations = {}
31913191
container_annotations = {}
3192+
all_annotations = {}
31923193
# pod name and namespace are set to an invalid value for cases where errors occur and a log
31933194
# message is produced, so that the log message has clearly invalid values for these rather
31943195
# than just being empty
@@ -3271,6 +3272,7 @@ def __get_log_config_for_container(self, cid, info, k8s_cache, base_attributes):
32713272
# by default all annotations will be applied to all containers
32723273
# in the pod
32733274
all_annotations = pod.annotations
3275+
32743276
container_specific_annotations = False
32753277

32763278
# get any common annotations for all containers
@@ -3409,6 +3411,56 @@ def __get_log_config_for_container(self, cid, info, k8s_cache, base_attributes):
34093411
if "parser" in attrs:
34103412
result["parser"] = attrs["parser"]
34113413

3414+
# Based on the pod annotations in a format {container_name}.{team}.secret={secret_name}
3415+
# we might want to add api_keys parameter
3416+
if container_annotations or all_annotations:
3417+
def filter_teams(annotations):
3418+
return {
3419+
team: value
3420+
for team, value in annotations.items()
3421+
if re.fullmatch("team\\d+", team)
3422+
}
3423+
3424+
def fetch_secret(name):
3425+
secret = k8s_cache.secret(pod_namespace, name, time.time())
3426+
if secret:
3427+
return secret
3428+
3429+
self._logger.warning(
3430+
"Failed to fetch secret '%s' for pod '%s/%s'"
3431+
% (name, pod_namespace, pod_name),
3432+
limit_once_per_x_secs=300,
3433+
limit_key="k8s-fetch-secret-%s" % name,
3434+
)
3435+
3436+
def get_secret_api_key(secret):
3437+
api_key = secret.data.get("api-key")
3438+
3439+
if not api_key:
3440+
self._logger.warning(
3441+
"Secret '%s/%s' does not contain a scalyr-api-key field, ingoring."
3442+
% (pod_namespace, secret.name),
3443+
limit_once_per_x_secs=300,
3444+
limit_key="k8s-fetch-secret-%s" % secret.name
3445+
)
3446+
3447+
return api_key
3448+
3449+
team_annotations = filter_teams(container_annotations) or filter_teams(all_annotations)
3450+
3451+
api_keys = [
3452+
get_secret_api_key(
3453+
fetch_secret(
3454+
team_annotations[team]["secret"]
3455+
)
3456+
)
3457+
for team in team_annotations.keys()
3458+
if team_annotations.get(team).get("secret")
3459+
]
3460+
3461+
if api_keys:
3462+
result["api_keys"] = list(filter(lambda api_key: api_key is not None, api_keys))
3463+
34123464
return result
34133465

34143466
def __get_docker_logs(self, containers, k8s_cache):

scalyr_agent/copying_manager/copying_manager.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -465,10 +465,11 @@ def config(self):
465465

466466
def add_log_config(self, monitor_name, log_config, force_add=False):
467467
"""Add the log_config item to the list of paths being watched
468+
If force_add is true and the log_config item is marked to be removed the removal will be canceled.
469+
Otherwise, the item will be added only if it's not monitored already.
468470
param: monitor_name - the name of the monitor adding the log config
469471
param: log_config - a log_config object containing the path to be added
470-
param force_add: True or force add this file and cancel any removal which
471-
may have been scheduled before hand.
472+
param force_add: bool, see above
472473
We really just want to use this with Docker monitor where there is a small windows between
473474
the container restart where the log file is not immediately removed.
474475
returns: an updated log_config object
@@ -570,7 +571,8 @@ def update_log_config(self, monitor_name, log_config):
570571

571572
def remove_log_path(self, monitor_name, log_path):
572573
"""Remove the log_path from the list of paths being watched
573-
params: log_path - a string containing the path to the file no longer being watched
574+
param: monitor - the monitor removing the path
575+
param: log_path - a string containing path of the log file to remove
574576
"""
575577
# get the list of paths with 0 reference counts
576578
self.__lock.acquire()

scalyr_agent/log_watcher.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,16 @@ class LogWatcher(object):
2424
to add/remove a set of log paths
2525
"""
2626

27-
def add_log_config(self, monitor_name, log_config):
28-
"""Add the path specified by the log_config to the list of paths being watched
29-
param: monitor_name - the name of the monitor adding the log_config
30-
param: log_config - a log_config object containing at least a path
31-
returns: the log_config variable with updated path and default information
27+
def add_log_config(self, monitor_name, log_config, force_add):
28+
"""Add the log_config item to the list of paths being watched
29+
If force_add is true and the log_config item is marked to be removed the removal will be canceled.
30+
Otherwise, the item will be added only if it's not monitored already.
31+
param: monitor_name - the name of the monitor adding the log config
32+
param: log_config - a log_config object containing the path to be added
33+
param force_add: bool, see above
34+
We really just want to use this with Docker monitor where there is a small windows between
35+
the container restart where the log file is not immediately removed.
36+
returns: an updated log_config object
3237
"""
3338
pass
3439

scalyr_agent/monitor_utils/k8s.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,11 @@
102102
"list": Template("/api/v1/namespaces/${namespace}/pods"),
103103
"list-all": "/api/v1/pods",
104104
},
105+
"Secret": {
106+
"single": Template("/api/v1/namespaces/${namespace}/secrets/${name}"),
107+
"list": Template("/api/v1/namespaces/${namespace}/secrets"),
108+
"list-all": "/api/v1/secrets"
109+
},
105110
"ReplicaSet": {
106111
"single": Template("/apis/apps/v1/namespaces/${namespace}/replicasets/${name}"),
107112
"list": Template("/apis/apps/v1/namespaces/${namespace}/replicasets"),
@@ -518,6 +523,16 @@ def __repr__(self):
518523
return str(self.__dict__)
519524

520525

526+
class Secret(object):
527+
def __init__(self, name, namespace, data, kind, string_data, type):
528+
self.name = name
529+
self.namespace = namespace
530+
self.data = data
531+
self.kind = kind
532+
self.string_data = string_data
533+
self.type = type
534+
535+
521536
class Controller(object):
522537
"""
523538
General class for all cached Controller objects
@@ -1012,6 +1027,19 @@ def process_object(self, k8s, obj, query_options=None):
10121027
return result
10131028

10141029

1030+
class SecretProcessor(_K8sProcessor):
1031+
def process_object(self, k8s, obj, query_options=None):
1032+
metadata = obj.get("metadata", {})
1033+
kind = obj.get("kind", "")
1034+
namespace = metadata.get("namespace", "")
1035+
name = metadata.get("name", "")
1036+
data = obj.get("data", {})
1037+
string_data = obj.get("stringData", {})
1038+
type = obj.get("type", "")
1039+
1040+
return Secret(name, namespace, data, kind, string_data, type)
1041+
1042+
10151043
class ControllerProcessor(_K8sProcessor):
10161044
def process_object(self, k8s, obj, query_options=None):
10171045
"""Generate a Controller object from a JSON object
@@ -1292,6 +1320,10 @@ def __init__(
12921320
self._pod_processor = PodProcessor(self._controllers)
12931321
self._pods_cache = _K8sCache(self._pod_processor, "Pod")
12941322

1323+
# create the secret cache
1324+
self._secret_processor = SecretProcessor()
1325+
self._secrets_cache = _K8sCache(self._secret_processor, "Secret")
1326+
12951327
self._cluster_name = None
12961328
self._api_server_version = None
12971329
# The last time (in seconds since epoch) we updated the K8s version number via a query
@@ -1571,6 +1603,7 @@ def update_cache(self, run_state):
15711603
scalyr_logging.DEBUG_LEVEL_1, "Marking unused pods as expired"
15721604
)
15731605
self._pods_cache.mark_as_expired(current_time)
1606+
self._secrets_cache.mark_as_expired(current_time)
15741607

15751608
self._update_cluster_name(local_state.k8s)
15761609
self._update_api_server_version_if_necessary(
@@ -1614,6 +1647,39 @@ def update_cache(self, run_state):
16141647
local_state.cache_expiry_secs - fuzz_factor
16151648
)
16161649

1650+
def secret(
1651+
self,
1652+
namespace,
1653+
name,
1654+
current_time=None,
1655+
allow_expired=False
1656+
):
1657+
"""Returns pod info for the pod specified by namespace and name or None if no pad matches.
1658+
1659+
Warning: Failure to pass current_time leads to incorrect recording of last access times, which will
1660+
lead to these objects being refreshed prematurely (potential source of bugs)
1661+
1662+
Querying the pod information is thread-safe, but the returned object should
1663+
not be written to.
1664+
1665+
@param allow_expired: If True, an object is considered present in cache even if it is expired.
1666+
@type allow_expired: bool
1667+
"""
1668+
local_state = self._state.copy_state()
1669+
1670+
if local_state.k8s is None:
1671+
return
1672+
1673+
return self._secrets_cache.lookup(
1674+
local_state.k8s,
1675+
current_time,
1676+
namespace,
1677+
name,
1678+
kind="Secret",
1679+
allow_expired=allow_expired,
1680+
ignore_k8s_api_exception=ignore_k8s_api_exception,
1681+
)
1682+
16171683
def pod(
16181684
self,
16191685
namespace,
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: multiple-account-printer
5+
namespace: default
6+
spec:
7+
replicas: 1
8+
selector:
9+
matchLabels:
10+
app: multiple-account-printer
11+
template:
12+
metadata:
13+
labels:
14+
app: multiple-account-printer
15+
annotations:
16+
log.config.scalyr.com/attributes.parser: "test-parser-1"
17+
log.config.scalyr.com/team1.secret: "scalyr-api-key-team-1"
18+
log.config.scalyr.com/cont1.team2.secret: "scalyr-api-key-team-2"
19+
log.config.scalyr.com/cont2.team2.secret: "scalyr-api-key-team-2"
20+
log.config.scalyr.com/cont2.team3.secret: "scalyr-api-key-team-3"
21+
spec:
22+
containers:
23+
- name: cont1
24+
image: docker.io/library/busybox:latest
25+
imagePullPolicy: IfNotPresent
26+
command: ["/bin/sh", "-c", "echo MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME:$MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME; sleep 900"]
27+
env:
28+
- name: MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME
29+
value: "cont1"
30+
- name: cont2
31+
image: docker.io/library/busybox:latest
32+
imagePullPolicy: IfNotPresent
33+
command: ["/bin/sh", "-c", "echo MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME:$MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME; sleep 900"]
34+
env:
35+
- name: MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME
36+
value: "cont2"
37+
- name: cont3
38+
image: docker.io/library/busybox:latest
39+
imagePullPolicy: IfNotPresent
40+
command: ["/bin/sh", "-c", "echo MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME:$MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME; sleep 900"]
41+
env:
42+
- name: MULTIPLE_ACCOUNT_TEST_CONTAINER_NAME
43+
value: "cont3"
44+
nodeSelector:
45+
kubernetes.io/os: linux

0 commit comments

Comments
 (0)