Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/6359.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add warning logs in storage implementations when used, limit bytes are under 0
Comment thread
HyeockJinKim marked this conversation as resolved.
Outdated
11 changes: 11 additions & 0 deletions src/ai/backend/storage/volumes/cephfs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import logging
import os
import shutil
from pathlib import Path
Expand All @@ -7,6 +8,7 @@
import aiofiles.os

from ai.backend.common.types import BinarySize, QuotaScopeID
from ai.backend.logging import BraceStyleAdapter

from ...exception import QuotaScopeNotFoundError
from ...subproc import run
Expand All @@ -20,6 +22,8 @@
)
from ..vfs import BaseFSOpModel, BaseQuotaModel, BaseVolume

log = BraceStyleAdapter(logging.getLogger(__spec__.name))


class CephDirQuotaModel(BaseQuotaModel):
async def create_quota_scope(
Expand Down Expand Up @@ -57,6 +61,13 @@ def read_attrs() -> tuple[int, int]:
None,
read_attrs,
)
if used_bytes < 0 or limit_bytes < 0:
log.warning(
"Used bytes < 0 ({}) or limit bytes < 0 ({}) for quota scope {} in CephFS",
used_bytes,
limit_bytes,
quota_scope_id,
)
return QuotaUsage(used_bytes=used_bytes, limit_bytes=limit_bytes)

async def update_quota_scope(
Expand Down
17 changes: 14 additions & 3 deletions src/ai/backend/storage/volumes/ddn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import logging
from pathlib import Path
from subprocess import CalledProcessError
from typing import Any, Final, FrozenSet, Mapping
Expand All @@ -8,6 +9,7 @@

from ai.backend.common.etcd import AsyncEtcd
from ai.backend.common.types import QuotaScopeID
from ai.backend.logging import BraceStyleAdapter

from ...exception import QuotaScopeAlreadyExists, QuotaScopeNotFoundError
from ...subproc import run
Expand All @@ -28,6 +30,9 @@ def _kilobyte_to_byte(kilobyte: int) -> int:
return kilobyte * 1024


log = BraceStyleAdapter(logging.getLogger(__spec__.name))


class EXAScalerQuotaModel(BaseQuotaModel):
def __init__(self, mount_path: Path, local_config: Mapping[str, Any], etcd: AsyncEtcd) -> None:
self.local_config = local_config
Expand Down Expand Up @@ -110,9 +115,15 @@ async def _get_quota_by_project(self, pid: int, qspath: Path) -> QuotaUsage | No
if raw_used_bytes.endswith("*"):
raw_used_bytes = raw_used_bytes[:-1]
used_bytes = _kilobyte_to_byte(int(raw_used_bytes))
return QuotaUsage(
used_bytes=used_bytes, limit_bytes=_kilobyte_to_byte(hard_limit)
)
limit_bytes = _kilobyte_to_byte(hard_limit)
if used_bytes < 0 or limit_bytes < 0:
log.warning(
"Used bytes < 0 ({}) or limit bytes < 0 ({}) for quota scope in EXAScaler with project ID {}",
used_bytes,
limit_bytes,
pid,
)
Comment thread
seedspirit marked this conversation as resolved.
return QuotaUsage(used_bytes=used_bytes, limit_bytes=limit_bytes)
if Path(words[0]) == qspath:
next_line_is_quota = True
continue
Expand Down
13 changes: 13 additions & 0 deletions src/ai/backend/storage/volumes/dellemc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import logging
from collections.abc import Mapping
from pathlib import Path
from typing import Any, FrozenSet, Optional, cast
Expand All @@ -9,6 +10,7 @@
from ai.backend.common.etcd import AsyncEtcd
from ai.backend.common.events.dispatcher import EventDispatcher, EventProducer
from ai.backend.common.types import HardwareMetadata, QuotaScopeID
from ai.backend.logging import BraceStyleAdapter

from ...exception import QuotaDirectoryNotEmptyError
from ...types import CapacityUsage, FSPerfMetric, QuotaConfig, QuotaUsage
Expand All @@ -19,6 +21,8 @@
from .exceptions import DellNoMetricError
from .onefs_client import OneFSClient, QuotaThresholds, QuotaTypes

log = BraceStyleAdapter(logging.getLogger(__spec__.name))


class DellEMCOneFSQuotaModel(BaseQuotaModel):
def __init__(
Expand Down Expand Up @@ -74,6 +78,15 @@ async def describe_quota_scope(
quota_id = await self._get_quota_id(qspath)
if quota_id is not None:
data = await self.api_client.get_quota(quota_id)
used_bytes = data["usage"]["fslogical"]
limit_bytes = data["thresholds"]["hard"]
if used_bytes < 0 or limit_bytes < 0:
log.warning(
"Used bytes < 0 ({}) or limit bytes < 0 ({}) for quota scope {} in DellEMCOneFS",
used_bytes,
limit_bytes,
quota_scope_id,
)
return QuotaUsage(
used_bytes=data["usage"]["fslogical"],
limit_bytes=data["thresholds"]["hard"],
Comment thread
seedspirit marked this conversation as resolved.
Outdated
Expand Down
14 changes: 12 additions & 2 deletions src/ai/backend/storage/volumes/gpfs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,22 @@ async def describe_quota_scope(self, quota_scope_id: QuotaScopeID) -> Optional[Q
)
custom_defined_quotas = [q for q in quotas if not q.isDefaultQuota]
if len(custom_defined_quotas) == 0:
log.warning("No custom defined quotas found for quota scope %s in GPFS", quota_scope_id)
return QuotaUsage(-1, -1)
quota_info = custom_defined_quotas[0]
# The units are kilobytes (ref: )
used_bytes = quota_info.blockUsage * 1024 if quota_info.blockUsage is not None else -1
limit_bytes = quota_info.blockLimit * 1024 if quota_info.blockLimit is not None else -1
if used_bytes < 0 or limit_bytes < 0:
log.warning(
"Used bytes < 0 ({}) or limit bytes < 0 ({}) for quota scope {} in GPFS",
used_bytes,
limit_bytes,
quota_scope_id,
)
return QuotaUsage(
used_bytes=quota_info.blockUsage * 1024 if quota_info.blockUsage is not None else -1,
limit_bytes=quota_info.blockLimit * 1024 if quota_info.blockLimit is not None else -1,
used_bytes=used_bytes,
limit_bytes=limit_bytes,
)

async def unset_quota(self, quota_scope_id: QuotaScopeID) -> None:
Expand Down
20 changes: 18 additions & 2 deletions src/ai/backend/storage/volumes/netapp/netappclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import asyncio
import contextlib
import enum
import logging
import uuid
from collections.abc import Iterable
from pathlib import Path
Expand All @@ -75,6 +76,8 @@

import aiohttp

from ai.backend.logging import BraceStyleAdapter

from ...exception import NetAppClientError
from ...types import QuotaConfig, QuotaUsage

Expand Down Expand Up @@ -122,6 +125,9 @@ class QTreeInfo(TypedDict):
statistics: NotRequired[dict[str, Any]]


log = BraceStyleAdapter(logging.getLogger(__spec__.name))


class NetAppClient:
endpoint: str
user: str
Expand Down Expand Up @@ -571,9 +577,19 @@ async def get_quota_report(
raise NetAppClientError(
f"Quota report not found for the volume {volume_id} and the qtree {qtree_name}"
)
used_bytes = records[0]["space"]["used"]["total"]
limit_bytes = records[0]["space"]["hard_limit"]
if used_bytes < 0 or limit_bytes < 0:
log.warning(
"Used bytes < 0 ({}) or limit bytes < 0 ({}) for quota rule in NetApp ONTAP with volume ID {} and qtree {}",
used_bytes,
limit_bytes,
volume_id,
qtree_name,
)
return QuotaUsage(
used_bytes=records[0]["space"]["used"]["total"],
limit_bytes=records[0]["space"]["hard_limit"],
used_bytes=used_bytes,
limit_bytes=limit_bytes,
)

async def get_qos_policies(self) -> List[Mapping[str, Any]]:
Expand Down
7 changes: 7 additions & 0 deletions src/ai/backend/storage/volumes/vast/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,13 @@ async def describe_quota_scope(self, quota_scope_id: QuotaScopeID) -> Optional[Q
return None
if (quota := await self.api_client.get_quota(vast_quota_id)) is None:
return None
if quota.used_capacity < 0 or quota.hard_limit < 0:
log.warning(
"Used bytes < 0 ({}) or limit bytes < 0 ({}) for quota scope {} in VAST",
quota.used_capacity,
quota.hard_limit,
quota_scope_id,
)
return QuotaUsage(
used_bytes=quota.used_capacity,
limit_bytes=quota.hard_limit,
Expand Down
13 changes: 11 additions & 2 deletions src/ai/backend/storage/volumes/weka/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,18 @@ async def describe_quota_scope(self, quota_scope_id: QuotaScopeID) -> Optional[Q

inode_id = await self._get_inode_id(qspath)
quota = await self.api_client.get_quota(self.fs_uid, inode_id)
used_bytes = quota.used_bytes if quota.used_bytes is not None else -1
limit_bytes = quota.hard_limit if quota.hard_limit is not None else -1
if used_bytes < 0 or limit_bytes < 0:
log.warning(
"Used bytes < 0 ({}) or limit bytes < 0 ({}) for quota scope {} in Weka",
used_bytes,
limit_bytes,
quota_scope_id,
)
return QuotaUsage(
used_bytes=quota.used_bytes if quota.used_bytes is not None else -1,
limit_bytes=quota.hard_limit if quota.hard_limit is not None else -1,
used_bytes=used_bytes,
limit_bytes=limit_bytes,
)

async def unset_quota(self, quota_scope_id: QuotaScopeID) -> None:
Expand Down
7 changes: 7 additions & 0 deletions src/ai/backend/storage/volumes/xfs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ async def describe_quota_scope(
# By default, report command displays the sizes in the 1 KiB unit.
used_bytes = int(used_kbs) * 1024
hard_limit_bytes = int(hard_limit_kbs) * 1024
if used_bytes < 0 or hard_limit_bytes < 0:
log.warning(
"Used bytes < 0 ({}) or limit bytes < 0 ({}) for quota scope {} in XFS",
used_bytes,
hard_limit_bytes,
quota_scope_id,
)
Comment on lines +219 to +226
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you should log report to check original response.

return QuotaUsage(used_bytes, hard_limit_bytes)

async def update_quota_scope(
Expand Down
Loading