diff --git a/gitlab2rbac.py b/gitlab2rbac.py index 63254ab..3708433 100644 --- a/gitlab2rbac.py +++ b/gitlab2rbac.py @@ -3,15 +3,17 @@ import logging from collections import defaultdict from os import environ -from time import sleep +from time import sleep, time import kubernetes +import requests from gitlab import Gitlab from kubernetes.client.rest import ApiException from slugify import slugify logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO + format="%(asctime)s - %(levelname)s - %(message)s", + level=environ.get("LOGLEVEL", "INFO").upper(), ) @@ -25,7 +27,17 @@ class GitlabHelper(object): 50: "maintainer", # NOTE: owner is only usable when your permissions are based on group. } - def __init__(self, url, token, timeout, groups, namespace_granularity, admins_group): + def __init__( + self, + url, + token, + timeout, + groups, + namespace_granularity, + admins_group, + username_ignore_list, + gitlab_groups_ignore_list, + ): self.client = None self.gitlab_users = [] self.groups = groups @@ -35,6 +47,8 @@ def __init__(self, url, token, timeout, groups, namespace_granularity, admins_gr self.namespace_granularity = namespace_granularity self.admins_group = admins_group self.namespaces = [] + self.username_ignore_list = username_ignore_list + self.gitlab_groups_ignore_list = gitlab_groups_ignore_list def connect(self): """Performs an authentication via private token. @@ -102,7 +116,7 @@ def get_admins(self): {"email": user.email, "id": "{}".format(user.id)} ) logging.info( - u"|user={} email={} access_level=admin".format( + "|user={} email={} access_level=admin".format( user.name, user.email ) ) @@ -112,8 +126,25 @@ def get_admins(self): exit(1) return [] + def check_user(self, user): + if user["bot"] == True: + logging.debug(f"Ignore user {user['username']} because it's a bot") + return False + if user["username"] in self.username_ignore_list: + logging.debug( + f"Ignore user {user['username']} because it's in the ignore list" + ) + return False + if user["state"] != "active": + logging.debug( + f"Ignoring user {user['username']} because is not active" + ) + return False + return True + def get_users(self, from_namespaces=None): """Returns all users from groups/projects. + We use a GraphQL to minimize the queries made to Gitlab API Args: from_namespaces (list): Retrieve users from this namespaces. @@ -131,25 +162,100 @@ def get_users(self, from_namespaces=None): try: users = [] namespaces = from_namespaces or self.namespaces + query = """ +query ($first: Int, $after: String) {{ + group(fullPath: "{namespace}") {{ + id + name + parent {{ + id + }} + groupMembers(first: $first, after: $after) {{ + pageInfo {{ + endCursor + hasNextPage + }} + nodes {{ + id + accessLevel {{ + integerValue + stringValue + }} + user {{ + id + bot + username + state + emails {{ + edges {{ + node {{ + email + }} + }} + }} + }} + }} + }} + }} +}} +""" for namespace in namespaces: - for member in namespace.members.list(all=True): - user = self.client.users.get(member.id) - users.append( - { - "access_level": member.access_level, - "email": user.email, - "id": "{}".format(user.id), + headers = { + "Authorization": "Bearer {token}".format(token=self.token), + "Content-Type": "application/json", + } + has_next_page = True + end_cursor = None + while has_next_page: + _start = time() + variables = {"first": 50} + if end_cursor: + variables["after"] = end_cursor + r = requests.post( + f"{self.url}/api/graphql", + headers=headers, + json={ + "query": query.format(namespace=namespace.name), + "variables": variables, + }, + ) + r.raise_for_status() + data = r.json() + timespent = time() - _start + logging.debug( + f"Fetched members of group {namespace.name} in {timespent} seconds" + ) + members = data["data"]["group"]["groupMembers"]["nodes"] + for member in members: + # ignore user if it doesn't pass some checks + if not self.check_user(member["user"]): + continue + + user = { + "access_level": member["accessLevel"]["integerValue"], + "email": member["user"]["emails"]["edges"][0]["node"][ + "email" + ], + "id": "{}".format( + member["user"]["id"].replace( + "gid://gitlab/User/", "" + ) + ), "namespace": slugify(namespace.name), + "username": member["user"]["username"], } - ) - logging.info( - u"|namespace={} user={} email={} access_level={}".format( - namespace.name, - user.name, - user.email, - member.access_level, + users.append(user) + logging.info( + "|namespace={} user={} email={} access_level={}".format( + namespace.name, + user["username"], + user["email"], + user["access_level"], + ) ) - ) + page_info = data["data"]["group"]["groupMembers"]["pageInfo"] + has_next_page = page_info["hasNextPage"] + end_cursor = page_info["endCursor"] return users except Exception as e: logging.error("unable to retrieve users :: {}".format(e)) @@ -159,9 +265,18 @@ def get_users(self, from_namespaces=None): def get_groups(self): groups = [] for group in self.groups: - for result in self.client.groups.list(search=group, all=True): - if result.parent_id is None: - logging.info(u"|found group={}".format(result.name)) + _start = time() + gitlab_groups = self.client.groups.list( + search=group, include_subgroups=False, all=True, per_page=50 + ) + timespent = time() - _start + logging.debug(f"Fetched groups in {timespent} seconds") + for result in gitlab_groups: + if ( + result.parent_id is None + and result.name not in self.gitlab_groups_ignore_list + ): + logging.info("|found group={}".format(result.name)) groups.append(result) return groups @@ -240,11 +355,11 @@ def auto_create(self, namespaces): def check_namespace(self, name): """Check if namespace exists. - Args: - name (str): kubernetes namespace. + Args: + name (str): kubernetes namespace. - Returns: - bool: True if exists, False otherwise. + Returns: + bool: True if exists, False otherwise. """ try: namespace = self.client_core.list_namespace( @@ -264,12 +379,12 @@ def check_namespace(self, name): def check_role_binding(self, name, namespace=None): """Check if role binding exists. - Args: - name (str): user_role_binding name. - namespace (str): kubernetes namespace. + Args: + name (str): user_role_binding name. + namespace (str): kubernetes namespace. - Returns: - bool: True if exists, False otherwise. + Returns: + bool: True if exists, False otherwise. """ try: full_name = "{}_{}".format(self.user_role_prefix, name) @@ -332,7 +447,7 @@ def create_role_binding( body=role_binding, _request_timeout=self.timeout ) logging.info( - u"|_ role-binding created name={} namespace={}".format( + "|_ role-binding created name={} namespace={}".format( name, namespace ) ) @@ -351,7 +466,9 @@ def delete_deprecated_user_role_bindings(self, users): users_grouped_by_ns[user["namespace"]].append(user) for ns in users_grouped_by_ns: - role_bindings = self.client_rbac.list_namespaced_role_binding(ns) + role_bindings = self.client_rbac.list_namespaced_role_binding( + ns + ) users_ids = [user["id"] for user in users_grouped_by_ns[ns]] for role_binding in role_bindings.items: @@ -369,14 +486,16 @@ def delete_deprecated_user_role_bindings(self, users): body=role_binding, ) logging.info( - u"|_ role-binding deprecated name={} namespace={}".format( + "|_ role-binding deprecated name={} namespace={}".format( role_binding.metadata.name, role_binding.metadata.namespace, ) ) except ApiException as e: - error = "unable to delete deprecated user role bindings :: {}".format( - eval(e.body)["message"] + error = ( + "unable to delete deprecated user role bindings :: {}".format( + eval(e.body)["message"] + ) ) logging.error(error) except Exception as e: @@ -389,7 +508,9 @@ def delete_deprecated_user_role_bindings(self, users): def delete_deprecated_cluster_role_bindings(self, users): try: cluster_users_ids = [user["id"] for user in users] - for role_binding in self.client_rbac.list_cluster_role_binding().items: + for ( + role_binding + ) in self.client_rbac.list_cluster_role_binding().items: try: user_id = role_binding.metadata.labels[ "gitlab2rbac.kubernetes.io/user_id" @@ -403,7 +524,7 @@ def delete_deprecated_cluster_role_bindings(self, users): body=role_binding, ) logging.info( - u"|_ cluster-role-binding deprecated name={}".format( + "|_ cluster-role-binding deprecated name={}".format( role_binding.metadata.name, ) ) @@ -507,6 +628,12 @@ def main(): ) GITLAB2RBAC_FREQUENCY = environ.get("GITLAB2RBAC_FREQUENCY", 60) + USERNAME_IGNORE_LIST = environ.get("USERNAME_IGNORE_LIST", []).split( + "," + ) + GITLAB_GROUPS_IGNORE_LIST = environ.get( + "GITLAB_GROUPS_IGNORE_LIST", "lost-and-found" + ).split(",") if not GITLAB_URL or not GITLAB_PRIVATE_TOKEN: raise Exception( @@ -520,7 +647,9 @@ def main(): timeout=GITLAB_TIMEOUT, groups=GITLAB_GROUPS_SEARCH, namespace_granularity=GITLAB_NAMESPACE_GRANULARITY, - admins_group=GITLAB_ADMINS_GROUP + admins_group=GITLAB_ADMINS_GROUP, + username_ignore_list=USERNAME_IGNORE_LIST, + gitlab_groups_ignore_list=GITLAB_GROUPS_IGNORE_LIST, ) gitlab_helper.connect()