Skip to content
14 changes: 7 additions & 7 deletions bumiworker/bumiworker/modules/archive/s3_abandoned_buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
)
from bumiworker.bumiworker.modules.recommendations.s3_abandoned_buckets import (
S3AbandonedBuckets as S3AbandonedBucketsRecommendation,
AVG_DATA_SIZE_KEY, TIER_1_REQUESTS_QUANTITY_KEY,
TIER_2_REQUESTS_QUANTITY_KEY
GET_OBJECT_KEY, PUT_OBJECT_KEY
)


Expand All @@ -15,12 +14,13 @@ class S3AbandonedBuckets(S3AbandonedBucketsArchiveBase,
]

def get_previous_metric_threshold_map(self, previous_options):
# Buckets are considered abandoned if both GetObject and PutObject
# operations are zero (no read or write activity)
# For backward compatibility, if old options exist, we still use
# the new logic since the recommendation criteria has changed
return {
TIER_1_REQUESTS_QUANTITY_KEY: previous_options.get(
'tier_1_request_quantity_threshold'),
TIER_2_REQUESTS_QUANTITY_KEY: previous_options.get(
'tier_2_request_quantity_threshold'),
AVG_DATA_SIZE_KEY: previous_options.get('data_size_threshold')
GET_OBJECT_KEY: 0,
PUT_OBJECT_KEY: 0
}


Expand Down
108 changes: 38 additions & 70 deletions bumiworker/bumiworker/modules/recommendations/s3_abandoned_buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,9 @@

LOG = logging.getLogger(__name__)

DEFAULT_DAYS_THRESHOLD = 7
DATA_SIZE_THRESHOLD = 1024
DATA_SIZE_KEY = 'data_size'
TIER_1_REQUESTS_THRESHOLD = 100
TIER_2_REQUESTS_THRESHOLD = 2000
MBS_IN_GB = 1024
AVG_DATA_SIZE_KEY = 'avg_data_size'
TIER_1_REQUESTS_QUANTITY_KEY = 'tier_1_request_quantity'
TIER_2_REQUESTS_QUANTITY_KEY = 'tier_2_request_quantity'
DEFAULT_DAYS_THRESHOLD = 30
GET_OBJECT_KEY = 'get_object_count'
PUT_OBJECT_KEY = 'put_object_count'


class S3AbandonedBuckets(S3AbandonedBucketsBase):
Expand All @@ -26,12 +20,6 @@ def __init__(self, organization_id, config_client, created_at):
self.option_ordered_map = OrderedDict({
'days_threshold': {
'default': DEFAULT_DAYS_THRESHOLD},
'data_size_threshold': {
'default': DATA_SIZE_THRESHOLD},
'tier_1_request_quantity_threshold': {
'default': TIER_1_REQUESTS_THRESHOLD},
'tier_2_request_quantity_threshold': {
'default': TIER_2_REQUESTS_THRESHOLD},
'excluded_pools': {
'default': {},
'clean_func': self.clean_excluded_pools,
Expand All @@ -40,90 +28,70 @@ def __init__(self, organization_id, config_client, created_at):
})

def get_metric_threshold_map(self):
options = self.get_options()
# Buckets are considered abandoned if both GetObject and PutObject
# operations are zero (no read or write activity)
return {
TIER_1_REQUESTS_QUANTITY_KEY: options.get(
'tier_1_request_quantity_threshold'),
TIER_2_REQUESTS_QUANTITY_KEY: options.get(
'tier_2_request_quantity_threshold'),
AVG_DATA_SIZE_KEY: options.get('data_size_threshold')
GET_OBJECT_KEY: False,
PUT_OBJECT_KEY: False
}

def _get_data_size_request_metrics(self, cloud_account_id,
cloud_resource_ids, start_date,
days_threshold):
product_families = ['Data Transfer', 'API Request']
tier_1_request_type = 'Requests-Tier1'
tier_2_request_type = 'Requests-Tier2'
data_api_requests = self.mongo_client.restapi.raw_expenses.aggregate([
# Query for GetObject and PutObject operations in API Request product family
target_operations = ['GetObject', 'PutObject']
api_request_pipeline = [
{
'$match': {
'$and': [
{'resource_id': {'$in': cloud_resource_ids}},
{'cloud_account_id': cloud_account_id},
{'start_date': {'$gte': start_date}},
{'product/productFamily': {'$in': product_families}}
{'product/productFamily': 'API Request'},
{'lineItem/Operation': {'$in': target_operations}}
]
}
},
{
'$group': {
'_id': {
'_id': '$resource_id',
'productFamily': '$product/productFamily',
'tier_type': '$lineItem/UsageType',
'operation': '$lineItem/Operation'
},
'usage_amount': {'$push': '$lineItem/UsageAmount'}
'total_usage': {
'$sum': '$lineItem/UsageAmount'
}
}
}
])
resource_data_request_map = {}
for data_api_request in data_api_requests:
cloud_resource_id = data_api_request['_id']['_id']
if not resource_data_request_map.get(cloud_resource_id):
resource_data_request_map[cloud_resource_id] = {}
resource_data_request_map[cloud_resource_id][
DATA_SIZE_KEY] = 0.0
resource_data_request_map[cloud_resource_id][
TIER_1_REQUESTS_QUANTITY_KEY] = 0
resource_data_request_map[cloud_resource_id][
TIER_2_REQUESTS_QUANTITY_KEY] = 0
total_sum = sum(
[float(x) for x in data_api_request['usage_amount']])
if data_api_request['_id']['productFamily'] == 'Data Transfer':
resource_data_request_map[cloud_resource_id][
DATA_SIZE_KEY] += total_sum
else:
res_tier_type = data_api_request['_id']['tier_type']
res_operation = data_api_request['_id']['operation']
if tier_1_request_type in res_tier_type:
resource_data_request_map[cloud_resource_id][
TIER_1_REQUESTS_QUANTITY_KEY] += int(total_sum)
elif (tier_2_request_type in res_tier_type and
res_operation == 'GetObject'):
resource_data_request_map[cloud_resource_id][
TIER_2_REQUESTS_QUANTITY_KEY] += int(total_sum)
]
api_requests = self.mongo_client.restapi.raw_expenses.aggregate(
api_request_pipeline)
resource_meter_value = {}
for res_id, meter_map in resource_data_request_map.items():
if not resource_meter_value.get(res_id):
resource_meter_value[res_id] = {}
for meter_key, total in meter_map.items():
if meter_key == DATA_SIZE_KEY:
avg_size = (total / days_threshold) * MBS_IN_GB
resource_meter_value[res_id][AVG_DATA_SIZE_KEY] = avg_size
else:
resource_meter_value[res_id][meter_key] = total
# Initialize all resources with no recorded activity
for res_id in cloud_resource_ids:
resource_meter_value[res_id] = {
GET_OBJECT_KEY: False,
PUT_OBJECT_KEY: False
}
# Aggregate operation usage (already summed by MongoDB)
for api_request in api_requests:
cloud_resource_id = api_request['_id']['_id']
operation = api_request['_id']['operation']
total_sum = int(api_request['total_usage'])
has_usage = bool(total_sum)
if operation == 'GetObject':
resource_meter_value[cloud_resource_id][
GET_OBJECT_KEY] = has_usage
elif operation == 'PutObject':
resource_meter_value[cloud_resource_id][
PUT_OBJECT_KEY] = has_usage
return resource_meter_value

@staticmethod
def metrics_result(data_req_map):
return {
'tier_1_request_quantity': data_req_map.get(
TIER_1_REQUESTS_QUANTITY_KEY),
'tier_2_request_quantity': data_req_map.get(
TIER_2_REQUESTS_QUANTITY_KEY),
'avg_data_size': data_req_map.get(AVG_DATA_SIZE_KEY),
'get_object_count': data_req_map.get(GET_OBJECT_KEY, False),
'put_object_count': data_req_map.get(PUT_OBJECT_KEY, False),
}


Expand Down
36 changes: 31 additions & 5 deletions bumiworker/bumiworker/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,23 +112,49 @@ def __init__(self, *args, **kwargs):
#
# tools subpackages stubs
#
if "tools" not in sys.modules:
tools_mod = types.ModuleType("tools")
sys.modules["tools"] = tools_mod

if "tools.optscale_time" not in sys.modules:
time_mod = types.ModuleType("tools.optscale_time")
from time import time as _time
def utcnow_timestamp(): return int(_time())
def utcnow(): return utcnow_timestamp()
def utcfromtimestamp(ts): return ts
def startday(ts): return ts
from datetime import datetime, timezone

def utcnow_timestamp():
return int(_time())

def utcnow():
return datetime.now(timezone.utc)

def utcfromtimestamp(ts):
return datetime.fromtimestamp(ts, tz=timezone.utc)

def startday(ts):
if isinstance(ts, datetime):
return datetime(ts.year, ts.month, ts.day, tzinfo=ts.tzinfo)
return ts

time_mod.utcnow_timestamp = utcnow_timestamp
time_mod.utcnow = utcnow
time_mod.utcfromtimestamp = utcfromtimestamp
time_mod.startday = startday
sys.modules["tools.optscale_time"] = time_mod
# Make it accessible as tools.optscale_time attribute
sys.modules["tools"].optscale_time = time_mod

if "tools.optscale_data" not in sys.modules:
data_mod = types.ModuleType("tools.optscale_data")
sys.modules["tools.optscale_data"] = data_mod
# Make it accessible as tools.optscale_data attribute
sys.modules["tools"].optscale_data = data_mod

if "tools.optscale_data.clickhouse" not in sys.modules:
ch_mod = types.ModuleType("tools.optscale_data.clickhouse")
class ExternalDataConverter:
def __init__(self, *args, **kwargs):
pass
ch_mod.ExternalDataConverter = ExternalDataConverter
sys.modules["tools.optscale_data.clickhouse"] = ch_mod
sys.modules["tools.optscale_data.clickhouse"] = ch_mod
# Make it accessible as tools.optscale_data.clickhouse attribute
sys.modules["tools.optscale_data"].clickhouse = ch_mod
Loading