Skip to content

Commit

Permalink
change the get_users method to use graphql and decrease the amount of…
Browse files Browse the repository at this point in the history
… queries made on the Gitlab API
  • Loading branch information
Solvik committed Aug 6, 2024
1 parent d137964 commit f6093f7
Showing 1 changed file with 168 additions and 39 deletions.
207 changes: 168 additions & 39 deletions gitlab2rbac.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
import logging
from collections import defaultdict
from os import environ
from time import sleep
from time import sleep, time

import kubernetes
import requests
from gitlab import Gitlab
from kubernetes.client.rest import ApiException
from slugify import slugify

logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
format="%(asctime)s - %(levelname)s - %(message)s",
level=environ.get("LOGLEVEL", "INFO").upper(),
)


Expand All @@ -25,7 +27,17 @@ class GitlabHelper(object):
50: "maintainer", # NOTE: owner is only usable when your permissions are based on group.
}

def __init__(self, url, token, timeout, groups, namespace_granularity, admins_group):
def __init__(
self,
url,
token,
timeout,
groups,
namespace_granularity,
admins_group,
username_ignore_list,
gitlab_groups_ignore_list,
):
self.client = None
self.gitlab_users = []
self.groups = groups
Expand All @@ -35,6 +47,8 @@ def __init__(self, url, token, timeout, groups, namespace_granularity, admins_gr
self.namespace_granularity = namespace_granularity
self.admins_group = admins_group
self.namespaces = []
self.username_ignore_list = username_ignore_list
self.gitlab_groups_ignore_list = gitlab_groups_ignore_list

def connect(self):
"""Performs an authentication via private token.
Expand Down Expand Up @@ -102,7 +116,7 @@ def get_admins(self):
{"email": user.email, "id": "{}".format(user.id)}
)
logging.info(
u"|user={} email={} access_level=admin".format(
"|user={} email={} access_level=admin".format(
user.name, user.email
)
)
Expand All @@ -112,8 +126,25 @@ def get_admins(self):
exit(1)
return []

def check_user(self, user):
if user["bot"] == True:
logging.debug(f"Ignore user {user['username']} because it's a bot")
return False
if user["username"] in self.username_ignore_list:
logging.debug(
f"Ignore user {user['username']} because it's in the ignore list"
)
return False
if user["state"] != "active":
logging.debug(
f"Ignoring user {user['username']} because is not active"
)
return False
return True

def get_users(self, from_namespaces=None):
"""Returns all users from groups/projects.
We use a GraphQL to minimize the queries made to Gitlab API
Args:
from_namespaces (list): Retrieve users from this namespaces.
Expand All @@ -131,25 +162,100 @@ def get_users(self, from_namespaces=None):
try:
users = []
namespaces = from_namespaces or self.namespaces
query = """
query ($first: Int, $after: String) {{
group(fullPath: "{namespace}") {{
id
name
parent {{
id
}}
groupMembers(first: $first, after: $after) {{
pageInfo {{
endCursor
hasNextPage
}}
nodes {{
id
accessLevel {{
integerValue
stringValue
}}
user {{
id
bot
username
state
emails {{
edges {{
node {{
email
}}
}}
}}
}}
}}
}}
}}
}}
"""
for namespace in namespaces:
for member in namespace.members.list(all=True):
user = self.client.users.get(member.id)
users.append(
{
"access_level": member.access_level,
"email": user.email,
"id": "{}".format(user.id),
headers = {
"Authorization": "Bearer {token}".format(token=self.token),
"Content-Type": "application/json",
}
has_next_page = True
end_cursor = None
while has_next_page:
_start = time()
variables = {"first": 50}
if end_cursor:
variables["after"] = end_cursor
r = requests.post(
f"{self.url}/api/graphql",
headers=headers,
json={
"query": query.format(namespace=namespace.name),
"variables": variables,
},
)
r.raise_for_status()
data = r.json()
timespent = time() - _start
logging.debug(
f"Fetched members of group {namespace.name} in {timespent} seconds"
)
members = data["data"]["group"]["groupMembers"]["nodes"]
for member in members:
# ignore user if it doesn't pass some checks
if not self.check_user(member["user"]):
continue

user = {
"access_level": member["accessLevel"]["integerValue"],
"email": member["user"]["emails"]["edges"][0]["node"][
"email"
],
"id": "{}".format(
member["user"]["id"].replace(
"gid://gitlab/User/", ""
)
),
"namespace": slugify(namespace.name),
"username": member["user"]["username"],
}
)
logging.info(
u"|namespace={} user={} email={} access_level={}".format(
namespace.name,
user.name,
user.email,
member.access_level,
users.append(user)
logging.info(
"|namespace={} user={} email={} access_level={}".format(
namespace.name,
user["username"],
user["email"],
user["access_level"],
)
)
)
page_info = data["data"]["group"]["groupMembers"]["pageInfo"]
has_next_page = page_info["hasNextPage"]
end_cursor = page_info["endCursor"]
return users
except Exception as e:
logging.error("unable to retrieve users :: {}".format(e))
Expand All @@ -159,9 +265,18 @@ def get_users(self, from_namespaces=None):
def get_groups(self):
groups = []
for group in self.groups:
for result in self.client.groups.list(search=group, all=True):
if result.parent_id is None:
logging.info(u"|found group={}".format(result.name))
_start = time()
gitlab_groups = self.client.groups.list(
search=group, include_subgroups=False, all=True, per_page=50
)
timespent = time() - _start
logging.debug(f"Fetched groups in {timespent} seconds")
for result in gitlab_groups:
if (
result.parent_id is None
and result.name not in self.gitlab_groups_ignore_list
):
logging.info("|found group={}".format(result.name))
groups.append(result)
return groups

Expand Down Expand Up @@ -240,11 +355,11 @@ def auto_create(self, namespaces):
def check_namespace(self, name):
"""Check if namespace exists.
Args:
name (str): kubernetes namespace.
Args:
name (str): kubernetes namespace.
Returns:
bool: True if exists, False otherwise.
Returns:
bool: True if exists, False otherwise.
"""
try:
namespace = self.client_core.list_namespace(
Expand All @@ -264,12 +379,12 @@ def check_namespace(self, name):
def check_role_binding(self, name, namespace=None):
"""Check if role binding exists.
Args:
name (str): user_role_binding name.
namespace (str): kubernetes namespace.
Args:
name (str): user_role_binding name.
namespace (str): kubernetes namespace.
Returns:
bool: True if exists, False otherwise.
Returns:
bool: True if exists, False otherwise.
"""
try:
full_name = "{}_{}".format(self.user_role_prefix, name)
Expand Down Expand Up @@ -332,7 +447,7 @@ def create_role_binding(
body=role_binding, _request_timeout=self.timeout
)
logging.info(
u"|_ role-binding created name={} namespace={}".format(
"|_ role-binding created name={} namespace={}".format(
name, namespace
)
)
Expand All @@ -351,7 +466,9 @@ def delete_deprecated_user_role_bindings(self, users):
users_grouped_by_ns[user["namespace"]].append(user)

for ns in users_grouped_by_ns:
role_bindings = self.client_rbac.list_namespaced_role_binding(ns)
role_bindings = self.client_rbac.list_namespaced_role_binding(
ns
)
users_ids = [user["id"] for user in users_grouped_by_ns[ns]]

for role_binding in role_bindings.items:
Expand All @@ -369,14 +486,16 @@ def delete_deprecated_user_role_bindings(self, users):
body=role_binding,
)
logging.info(
u"|_ role-binding deprecated name={} namespace={}".format(
"|_ role-binding deprecated name={} namespace={}".format(
role_binding.metadata.name,
role_binding.metadata.namespace,
)
)
except ApiException as e:
error = "unable to delete deprecated user role bindings :: {}".format(
eval(e.body)["message"]
error = (
"unable to delete deprecated user role bindings :: {}".format(
eval(e.body)["message"]
)
)
logging.error(error)
except Exception as e:
Expand All @@ -389,7 +508,9 @@ def delete_deprecated_user_role_bindings(self, users):
def delete_deprecated_cluster_role_bindings(self, users):
try:
cluster_users_ids = [user["id"] for user in users]
for role_binding in self.client_rbac.list_cluster_role_binding().items:
for (
role_binding
) in self.client_rbac.list_cluster_role_binding().items:
try:
user_id = role_binding.metadata.labels[
"gitlab2rbac.kubernetes.io/user_id"
Expand All @@ -403,7 +524,7 @@ def delete_deprecated_cluster_role_bindings(self, users):
body=role_binding,
)
logging.info(
u"|_ cluster-role-binding deprecated name={}".format(
"|_ cluster-role-binding deprecated name={}".format(
role_binding.metadata.name,
)
)
Expand Down Expand Up @@ -507,6 +628,12 @@ def main():
)

GITLAB2RBAC_FREQUENCY = environ.get("GITLAB2RBAC_FREQUENCY", 60)
USERNAME_IGNORE_LIST = environ.get("USERNAME_IGNORE_LIST", []).split(
","
)
GITLAB_GROUPS_IGNORE_LIST = environ.get(
"GITLAB_GROUPS_IGNORE_LIST", "lost-and-found"
).split(",")

if not GITLAB_URL or not GITLAB_PRIVATE_TOKEN:
raise Exception(
Expand All @@ -520,7 +647,9 @@ def main():
timeout=GITLAB_TIMEOUT,
groups=GITLAB_GROUPS_SEARCH,
namespace_granularity=GITLAB_NAMESPACE_GRANULARITY,
admins_group=GITLAB_ADMINS_GROUP
admins_group=GITLAB_ADMINS_GROUP,
username_ignore_list=USERNAME_IGNORE_LIST,
gitlab_groups_ignore_list=GITLAB_GROUPS_IGNORE_LIST,
)
gitlab_helper.connect()

Expand Down

0 comments on commit f6093f7

Please sign in to comment.