Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docker/rucio_client/scripts/k8s_sync_sites.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,5 @@ echo "Setting wmcore_output_tape attribute"
./setWmcoreTapeOutput
echo "Update DDM quota"
./updateDDMQuota
echo "Set wmcore_transferor account limits"
./updateWMCoreTransferorQuotas
34 changes: 34 additions & 0 deletions docker/rucio_client/scripts/rseUsageMetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#! /usr/bin/env python3

from rucio.client import Client

client = Client()

def get_sum_of_all_rse_statics(rse_expression):
rses = [rse["rse"] for rse in client.list_rses(rse_expression=rse_expression)]
result = 0
for rse in rses:
static, _, _ = get_rse_usage(rse)
result += static
return result


def get_rse_usage(rse):
rse_usage = list(client.get_rse_usage(rse))

required_fields = {"static", "rucio", "expired"}
relevant_info = {}

for source in rse_usage:
# Assuming source and used keys exist
relevant_info[source["source"]] = source["used"]

if not required_fields.issubset(relevant_info.keys()):
print("Skipping {} due to lack of relevant key in rse".format(rse))
print("{} is not a subset of {}".format(required_fields, relevant_info.keys()))
return 0, 0, 0

# Apparently, python integers do not overflow, https://docs.python.org/3/library/exceptions.html#OverflowError

static, rucio, expired = relevant_info["static"], relevant_info["rucio"], relevant_info["expired"]
return static, rucio, expired
37 changes: 4 additions & 33 deletions docker/rucio_client/scripts/updateDDMQuota
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#! /usr/bin/env python3

import math
from rucio.client import Client
from tabulate import tabulate
client = Client()

from rucio.client import Client
from rseUsageMetrics import get_rse_usage, get_sum_of_all_rse_statics

client = Client()
# See the following link for documentation and please update it if you change the logic
# https://cmsdmops.docs.cern.ch/Operators/ManageDMWeight/

Expand All @@ -31,37 +33,6 @@ def print_results():
print(tabulate(table_disk, headers=headers))
print(tabulate(table_tape, headers=headers))


def get_sum_of_all_rse_statics(rse_expression):
rses = [rse["rse"] for rse in client.list_rses(rse_expression=rse_expression)]
result = 0
for rse in rses:
static, _, _ = get_rse_usage(rse)
result += static
return result


def get_rse_usage(rse):
rse_usage = list(client.get_rse_usage(rse))

required_fields = {"static", "rucio", "expired"}
relevant_info = {}

for source in rse_usage:
# Assuming source and used keys exist
relevant_info[source["source"]] = source["used"]

if not required_fields.issubset(relevant_info.keys()):
print("Skipping {} due to lack of relevant key in rse".format(rse))
print("{} is not a subset of {}".format(required_fields, relevant_info.keys()))
return 0, 0, 0

# Apparently, python integers do not overflow, https://docs.python.org/3/library/exceptions.html#OverflowError

static, rucio, expired = relevant_info["static"], relevant_info["rucio"], relevant_info["expired"]
return static, rucio, expired


def calculate_dm_weights(rse_expression, static_weight, free_weight, expired_weight, make_quadratic):

total_static = get_sum_of_all_rse_statics(rse_expression)
Expand Down
70 changes: 70 additions & 0 deletions docker/rucio_client/scripts/updateWMCoreTransferorQuotas
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#! /usr/bin/env python3
"""
Script to update the global and the local quotas of the wmcore_transferor account.
Its global quota is set to X % of total disk capacity of T1 and T2 sites used in production
Its local quota is Y % of an RSE's (free + expired) space.
Global quota is introduced to have a global control on the usage of this account.
Local quotas are introduced to avoid the over-usage of certain RSEs.
"""

from rucio.client import Client
from rseUsageMetrics import get_rse_usage, get_sum_of_all_rse_statics

DEFAULT_GLOBAL_QUOTA_PCT = 15
DEFAULT_LOCAL_QUOTA_PCT = 50
DRY_RUN = False

client = Client()
account = "wmcore_transferor"
rse_expression = "rse_type=DISK&cms_type=real&tier<3&tier>0"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @amaltaro the global limit can be get by either [1] or [2]. Global limit means a limit across multiple RSEs. Rucio allows multiple global limits, so if you use [2], you will get an iterable object instead of a single value. We'll introduce only one global limit, which will be over this RSE expression.

I can't think of any reason why this RSE expression might change in the future or we can have multiple global limits with different RSE expressions, but since the system allows it, making your system rely on a configuration that can essentially change isn't a good idea in my opinion. Probably the best is not to touch MSTransferor and let it fail and retry. It will fail with a distinct error message [3] and the operator looking into the logs will know what's going on. (If s/he misses the alerts)

[1] https://rucio.github.io/documentation/html/client_api/accountclient.html#rucio.client.accountclient.AccountClient.get_global_account_limit
[2] https://rucio.github.io/documentation/html/client_api/accountclient.html#rucio.client.accountclient.AccountClient.get_global_account_limits
[3]

$ rucio rule add -a haozturk --rses T1_RU_JINR_Disk --comment "test" --copies 1 -d cms:/GluGluHToTauTau_HTXSFilter_STXS1p1_Bin110to113_M125_TuneCP5_13TeV-powheg-pythia8/RunIISummer20UL16MiniAODAPVv2-106X_mcRun2_asymptotic_preVFP_v11_ext1-v2/MINIAODSIM
2025-05-06 10:19:51,129	ERROR	There is not enough quota left to fulfil the operation.
Details: There is insufficient quota on any of the target RSE's to fulfill the operation.


def get_global_quota_pct():
try:
global_quota_pct = int(client.get_config(section="accounts", option="wmcore_transferor_global_quota_pct"))
except Exception as e:
global_quota_pct = DEFAULT_GLOBAL_QUOTA_PCT
return global_quota_pct

def get_local_quota_pct():
try:
local_quota_pct = int(client.get_config(section="accounts", option="wmcore_transferor_local_quota_pct"))
except Exception as e:
local_quota_pct = DEFAULT_LOCAL_QUOTA_PCT
return local_quota_pct


def set_global_quota():

total_disk_capacity = get_sum_of_all_rse_statics(rse_expression)
global_quota_pct = get_global_quota_pct()
global_quota_bytes = (total_disk_capacity * global_quota_pct) / 100
if DRY_RUN:
print(f"DRY-RUN: Setting global quota of wmcore_transferor to {global_quota_bytes / 1e15} PB")
else:
print(f"Setting global quota of wmcore_transferor to {global_quota_bytes / 1e15} PB")
set_global_account_limit(account, rse_expression, global_quota_bytes)

def set_local_quotas():

rses = [rse["rse"] for rse in client.list_rses(rse_expression=rse_expression)]
local_quota_pct = get_local_quota_pct()

for rse in rses:
static, rucio, expired = get_rse_usage(rse)
free = static - rucio

# Set it to 0 if it's negative
local_quota_bytes = max((((free + expired) * local_quota_pct) / 100), 0)
if DRY_RUN:
print(f"DRY-RUN: Setting local quota of wmcore_transferor at {rse} to {local_quota_bytes / 1e12} TB")
else:
print(f"Setting local quota of wmcore_transferor at {rse} to {local_quota_bytes / 1e12} TB")
set_local_account_limit(account, rse, local_quota_bytes)


def main():
set_global_quota()
set_local_quotas()

if __name__ == "__main__":
main()