diff --git a/Dockerfile b/Dockerfile index 19bfaa1..c95cd6f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7-buster +FROM python:3.12-slim COPY requirements.txt /requirements.txt RUN pip install -r /requirements.txt diff --git a/README.md b/README.md index 2d3a608..9dbe64e 100644 --- a/README.md +++ b/README.md @@ -219,18 +219,20 @@ Any admin on GitLab is an admin of the Kubernetes cluster. ## Advanced configuration `gitlab2rbac` supports multiple environment variables for advanced configuration: -| Flag | Description | Default | -|:------------------------------------|:----------------------------------------------------------------------------|:-----------| -|`GITLAB2RBAC_FREQUENCY` |Update interval in seconds. |60 | -|`GITLAB_ADMINS_GROUP` |Base your k8s admins on GitLab namespace (None means GitLab administrators). |None | -|`GITLAB_GROUPS_SEARCH` |Limit to those groups (separated by commas, empty means all groups). |gitlab2rbac | -|`GITLAB_NAMESPACE_GRANULARITY` |Whether to get permissions from GitLab projects or groups. |project | -|`GITLAB_PRIVATE_TOKEN` |Configure gitlab API token. | | -|`GITLAB_TIMEOUT` |Timeout for GitLab operations, in seconds. |10 | -|`GITLAB_URL` |Configure gitlab API target. | | -|`KUBERNETES_AUTO_CREATE` |Replicate GitLab groups/projects as Kubernetes namespaces. |False | -|`KUBERNETES_LOAD_INCLUSTER_CONFIG` |Load configuration inside Kubernetes when gitlab2rbac runs as a pod. |False | -|`KUBERNETES_TIMEOUT` |Timeout for Kubernetes operations, in seconds. |10 | +| Flag | Description | Default | +|:------------------------------------|:----------------------------------------------------------------------------|:------------------| +|`GITLAB2RBAC_FREQUENCY` |Update interval in seconds. |60 | +|`GITLAB_ADMINS_GROUP` |Base your k8s admins on GitLab namespace (None means GitLab administrators). |None | +|`GITLAB_GROUPS_IGNORE_LIST` |Groups to ignore (separated by commas, default value is "lost-and-found" |lost-and-found | +|`GITLAB_GROUPS_SEARCH` |Limit to those groups (separated by commas, empty means all groups). |gitlab2rbac | +|`GITLAB_NAMESPACE_GRANULARITY` |Whether to get permissions from GitLab projects or groups. |project | +|`GITLAB_PRIVATE_TOKEN` |Configure gitlab API token. | | +|`GITLAB_USERNAME_IGNORE_LIST` |Gitlab users to ignore for the synchronisation | | +|`GITLAB_TIMEOUT` |Timeout for GitLab operations, in seconds. |10 | +|`GITLAB_URL` |Configure gitlab API target. | | +|`KUBERNETES_AUTO_CREATE` |Replicate GitLab groups/projects as Kubernetes namespaces. |False | +|`KUBERNETES_LOAD_INCLUSTER_CONFIG` |Load configuration inside Kubernetes when gitlab2rbac runs as a pod. |False | +|`KUBERNETES_TIMEOUT` |Timeout for Kubernetes operations, in seconds. |10 | ## License MIT diff --git a/gitlab2rbac.py b/gitlab2rbac.py index 63254ab..a66a1d5 100644 --- a/gitlab2rbac.py +++ b/gitlab2rbac.py @@ -3,17 +3,22 @@ import logging from collections import defaultdict from os import environ -from time import sleep +from time import sleep, time import kubernetes +from gql import gql, Client +from gql.transport.requests import RequestsHTTPTransport from gitlab import Gitlab from kubernetes.client.rest import ApiException from slugify import slugify logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO + format="%(asctime)s - %(levelname)s - %(message)s", + level=environ.get("LOGLEVEL", "INFO").upper(), ) +logging.getLogger("gql").setLevel(logging.WARNING) + class GitlabHelper(object): @@ -25,7 +30,17 @@ class GitlabHelper(object): 50: "maintainer", # NOTE: owner is only usable when your permissions are based on group. } - def __init__(self, url, token, timeout, groups, namespace_granularity, admins_group): + def __init__( + self, + url, + token, + timeout, + groups, + namespace_granularity, + admins_group, + username_ignore_list, + groups_ignore_list, + ): self.client = None self.gitlab_users = [] self.groups = groups @@ -35,6 +50,8 @@ def __init__(self, url, token, timeout, groups, namespace_granularity, admins_gr self.namespace_granularity = namespace_granularity self.admins_group = admins_group self.namespaces = [] + self.username_ignore_list = username_ignore_list + self.groups_ignore_list = groups_ignore_list def connect(self): """Performs an authentication via private token. @@ -102,7 +119,7 @@ def get_admins(self): {"email": user.email, "id": "{}".format(user.id)} ) logging.info( - u"|user={} email={} access_level=admin".format( + "|user={} email={} access_level=admin".format( user.name, user.email ) ) @@ -112,8 +129,49 @@ def get_admins(self): exit(1) return [] + def check_user(self, user): + if user["bot"] == True: + logging.debug(f"Ignore user {user['username']} because it's a bot") + return False + if user["username"] in self.username_ignore_list: + logging.debug( + f"Ignore user {user['username']} because it's in the ignore list" + ) + return False + if user["state"] != "active": + logging.debug( + f"Ignoring user {user['username']} because is not active" + ) + return False + return True + + def _get_users_query_paginated( + self, gql_client, query, variable_values=None + ): + if variable_values is None: + variable_values = {} + variable_values["first"] = 50 + raw = gql_client.execute( + query, variable_values=variable_values, parse_result=True + ) + nodes = [] + page_info = {"hasNextPage": True} + while page_info.get("hasNextPage"): + variable_values["after"] = page_info.get("endCursor") + results = ( + gql_client.execute( + query, variable_values=variable_values, parse_result=True + ) + .get("group") + .get("groupMembers") + ) + nodes += results.get("nodes") + page_info = results.get("pageInfo") + return nodes + def get_users(self, from_namespaces=None): """Returns all users from groups/projects. + We use a GraphQL to minimize the queries made to Gitlab API Args: from_namespaces (list): Retrieve users from this namespaces. @@ -131,23 +189,89 @@ def get_users(self, from_namespaces=None): try: users = [] namespaces = from_namespaces or self.namespaces + query = gql( + """ +query ($first: Int, $after: String, $namespace : ID!) { + group(fullPath: $namespace) { + id + name + parent { + id + } + groupMembers(first: $first, after: $after) { + pageInfo { + endCursor + hasNextPage + } + nodes { + id + accessLevel { + integerValue + stringValue + } + user { + id + bot + username + state + emails { + edges { + node { + email + } + } + } + } + } + } + } +} +""" + ) + transport = RequestsHTTPTransport( + url=f"{self.url}/api/graphql", + headers={ + "Authorization": f"Bearer {self.token}", + "Content-Type": "application/json", + }, + use_json=True, + ) + client = Client( + transport=transport, fetch_schema_from_transport=True + ) for namespace in namespaces: - for member in namespace.members.list(all=True): - user = self.client.users.get(member.id) - users.append( - { - "access_level": member.access_level, - "email": user.email, - "id": "{}".format(user.id), - "namespace": slugify(namespace.name), - } - ) + _start = time() + variable_values = {"namespace": namespace.name} + members = self._get_users_query_paginated( + client, query, variable_values + ) + timespent = time() - _start + logging.debug( + f"Fetched members of group {namespace.name} in {timespent} seconds" + ) + for member in members: + # ignore user if it doesn't pass some checks + if not self.check_user(member["user"]): + continue + + user = { + "access_level": member["accessLevel"]["integerValue"], + "email": member["user"]["emails"]["edges"][0]["node"][ + "email" + ], + "id": member["user"]["id"].replace( + "gid://gitlab/User/", "" + ), + "namespace": slugify(namespace.name), + "username": member["user"]["username"], + } + users.append(user) logging.info( - u"|namespace={} user={} email={} access_level={}".format( + "|namespace={} user={} email={} access_level={}".format( namespace.name, - user.name, - user.email, - member.access_level, + user["username"], + user["email"], + user["access_level"], ) ) return users @@ -159,9 +283,17 @@ def get_users(self, from_namespaces=None): def get_groups(self): groups = [] for group in self.groups: - for result in self.client.groups.list(search=group, all=True): - if result.parent_id is None: - logging.info(u"|found group={}".format(result.name)) + _start = time() + gitlab_groups = self.client.groups.list( + search=group, + top_level_only=True, + all=True, + ) + timespent = time() - _start + logging.debug(f"Fetched groups in {timespent} seconds") + for result in gitlab_groups: + if result.name not in self.groups_ignore_list: + logging.info("|found group={}".format(result.name)) groups.append(result) return groups @@ -240,11 +372,11 @@ def auto_create(self, namespaces): def check_namespace(self, name): """Check if namespace exists. - Args: - name (str): kubernetes namespace. + Args: + name (str): kubernetes namespace. - Returns: - bool: True if exists, False otherwise. + Returns: + bool: True if exists, False otherwise. """ try: namespace = self.client_core.list_namespace( @@ -264,12 +396,12 @@ def check_namespace(self, name): def check_role_binding(self, name, namespace=None): """Check if role binding exists. - Args: - name (str): user_role_binding name. - namespace (str): kubernetes namespace. + Args: + name (str): user_role_binding name. + namespace (str): kubernetes namespace. - Returns: - bool: True if exists, False otherwise. + Returns: + bool: True if exists, False otherwise. """ try: full_name = "{}_{}".format(self.user_role_prefix, name) @@ -332,7 +464,7 @@ def create_role_binding( body=role_binding, _request_timeout=self.timeout ) logging.info( - u"|_ role-binding created name={} namespace={}".format( + "|_ role-binding created name={} namespace={}".format( name, namespace ) ) @@ -351,7 +483,9 @@ def delete_deprecated_user_role_bindings(self, users): users_grouped_by_ns[user["namespace"]].append(user) for ns in users_grouped_by_ns: - role_bindings = self.client_rbac.list_namespaced_role_binding(ns) + role_bindings = self.client_rbac.list_namespaced_role_binding( + ns + ) users_ids = [user["id"] for user in users_grouped_by_ns[ns]] for role_binding in role_bindings.items: @@ -369,14 +503,16 @@ def delete_deprecated_user_role_bindings(self, users): body=role_binding, ) logging.info( - u"|_ role-binding deprecated name={} namespace={}".format( + "|_ role-binding deprecated name={} namespace={}".format( role_binding.metadata.name, role_binding.metadata.namespace, ) ) except ApiException as e: - error = "unable to delete deprecated user role bindings :: {}".format( - eval(e.body)["message"] + error = ( + "unable to delete deprecated user role bindings :: {}".format( + eval(e.body)["message"] + ) ) logging.error(error) except Exception as e: @@ -389,7 +525,9 @@ def delete_deprecated_user_role_bindings(self, users): def delete_deprecated_cluster_role_bindings(self, users): try: cluster_users_ids = [user["id"] for user in users] - for role_binding in self.client_rbac.list_cluster_role_binding().items: + for ( + role_binding + ) in self.client_rbac.list_cluster_role_binding().items: try: user_id = role_binding.metadata.labels[ "gitlab2rbac.kubernetes.io/user_id" @@ -403,7 +541,7 @@ def delete_deprecated_cluster_role_bindings(self, users): body=role_binding, ) logging.info( - u"|_ cluster-role-binding deprecated name={}".format( + "|_ cluster-role-binding deprecated name={}".format( role_binding.metadata.name, ) ) @@ -507,6 +645,12 @@ def main(): ) GITLAB2RBAC_FREQUENCY = environ.get("GITLAB2RBAC_FREQUENCY", 60) + GITLAB_USERNAME_IGNORE_LIST = environ.get( + "GITLAB_USERNAME_IGNORE_LIST", "" + ).split(",") + GITLAB_GROUPS_IGNORE_LIST = environ.get( + "GITLAB_GROUPS_IGNORE_LIST", "lost-and-found" + ).split(",") if not GITLAB_URL or not GITLAB_PRIVATE_TOKEN: raise Exception( @@ -520,7 +664,9 @@ def main(): timeout=GITLAB_TIMEOUT, groups=GITLAB_GROUPS_SEARCH, namespace_granularity=GITLAB_NAMESPACE_GRANULARITY, - admins_group=GITLAB_ADMINS_GROUP + admins_group=GITLAB_ADMINS_GROUP, + username_ignore_list=GITLAB_USERNAME_IGNORE_LIST, + groups_ignore_list=GITLAB_GROUPS_IGNORE_LIST, ) gitlab_helper.connect() diff --git a/gitlab2rbac/Chart.yaml b/gitlab2rbac/Chart.yaml index 303d61d..58268f0 100644 --- a/gitlab2rbac/Chart.yaml +++ b/gitlab2rbac/Chart.yaml @@ -21,4 +21,4 @@ version: 0.1.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.2.3" +appVersion: "0.2.4" diff --git a/requirements.txt b/requirements.txt index 851e0a7..b1a421a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,19 @@ -cachetools==4.1.1 -certifi==2020.6.20 -chardet==3.0.4 -google-auth==1.20.0 -idna==2.10 -kubernetes==11.0.0 -oauthlib==3.1.0 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 -python-dateutil==2.8.1 -python-gitlab==2.4.0 -python-slugify==4.0.1 -PyYAML==5.4 -requests==2.24.0 -requests-oauthlib==1.3.0 -rsa==4.7 -six==1.15.0 +cachetools==5.4.0 +certifi==2024.7.4 +charset-normalizer==3.3.2 +google-auth==2.32.0 +gql==3.5.0 +idna==3.7 +kubernetes==30.1.0 +oauthlib==3.2.2 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +python-dateutil==2.9.0.post0 +python-gitlab==4.9.0 +python-slugify==8.0.4 +PyYAML==6.0.1 +rsa==4.9 +six==1.16.0 text-unidecode==1.3 -urllib3==1.26.5 -websocket-client==0.57.0 +urllib3==2.2.2 +websocket-client==1.8.0