Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions agent/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,11 +849,11 @@ def fetch_site_status(self):
def get_timezone(self):
return self.timezone

def fetch_site_info(self):
def fetch_site_info(self, database_only=False):
return {
"config": self.config,
"timezone": self.get_timezone(),
"usage": self.get_usage(),
"usage": self.get_usage(database_only=database_only),
}

def fetch_site_analytics(self):
Expand Down Expand Up @@ -1404,8 +1404,13 @@ def fetch_latest_backup(self, with_files=True):

return backups

def get_usage(self):
def get_usage(self, database_only=False):
"""Returns Usage in bytes"""
if database_only:
# Skip the recursive file-size walk; callers refreshing only the
# database size don't need (and time out on) the file totals.
return {"database": b2mb(self.get_database_size())}

backup_directory = os.path.join(self.directory, "private", "backups")
public_directory = os.path.join(self.directory, "public")
private_directory = os.path.join(self.directory, "private")
Expand Down
29 changes: 12 additions & 17 deletions agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,25 +82,20 @@ def download_file(url, prefix):


def get_size(folder, ignore_dirs=None):
"""Returns the size of the folder in bytes. Ignores symlinks"""
total_size = os.path.getsize(folder)
"""Returns the apparent size of the folder in bytes.

if ignore_dirs is None:
ignore_dirs = []

for item in os.listdir(folder):
itempath = os.path.join(folder, item)

if item in ignore_dirs:
continue

if not os.path.islink(itempath):
if os.path.isfile(itempath):
total_size += os.path.getsize(itempath)
elif os.path.isdir(itempath):
total_size += get_size(itempath)
Shells out to `du` (C) instead of walking the tree in Python: a recursive
Python stat of every file was timing out gunicorn workers on sites with
large file trees. `du -b` reports apparent size (st_size), matching the
old behaviour. ignore_dirs is applied at the top level only, as before.
"""

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The docstring claims ignore_dirs is still applied "at the top level only, as before", but du --exclude=PATTERN matches the pattern against basenames at every depth in the tree — not just immediate children. In practice this doesn't matter for the current sole caller (ignore_dirs=["backups"], where "backups" only exists at the top of the private tree), but the comment documents wrong behaviour and could mislead future callers that pass names which happen to appear deeper in the tree.

Suggested change
Shells out to `du` (C) instead of walking the tree in Python: a recursive
Python stat of every file was timing out gunicorn workers on sites with
large file trees. `du -b` reports apparent size (st_size), matching the
old behaviour. ignore_dirs is applied at the top level only, as before.
"""
Shells out to `du` (C) instead of walking the tree in Python: a recursive
Python stat of every file was timing out gunicorn workers on sites with
large file trees. `du -b` reports apparent size (st_size), matching the
old behaviour. Note: `du --exclude` matches basenames at every depth in
the tree, not just the top level (unlike the old Python walk).
"""
Prompt To Fix With AI
This is a comment left during a code review.
Path: agent/utils.py
Line: 87-91

Comment:
The docstring claims `ignore_dirs` is still applied "at the top level only, as before", but `du --exclude=PATTERN` matches the pattern against basenames **at every depth** in the tree — not just immediate children. In practice this doesn't matter for the current sole caller (`ignore_dirs=["backups"]`, where "backups" only exists at the top of the private tree), but the comment documents wrong behaviour and could mislead future callers that pass names which happen to appear deeper in the tree.

```suggestion
    Shells out to `du` (C) instead of walking the tree in Python: a recursive
    Python stat of every file was timing out gunicorn workers on sites with
    large file trees. `du -b` reports apparent size (st_size), matching the
    old behaviour. Note: `du --exclude` matches basenames at every depth in
    the tree, not just the top level (unlike the old Python walk).
    """
```

How can I resolve this? If you propose a fix, please make it concise.

command = ["du", "-sb"]
for ignored in ignore_dirs or []:
command += ["--exclude", ignored]
command.append(folder)

return total_size
output = subprocess.check_output(command, text=True)
return int(output.split(maxsplit=1)[0])
Comment on lines +95 to +96

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 subprocess.check_output without a timeout blocks indefinitely if du stalls on an unresponsive NFS/FUSE mount, holding the gunicorn worker in the same way the old Python walk did. A reasonable upper bound (e.g. 120 s) would surface the failure quickly instead of silently pinning a worker.

Suggested change
output = subprocess.check_output(command, text=True)
return int(output.split(maxsplit=1)[0])
output = subprocess.check_output(command, text=True, timeout=120)
return int(output.split(maxsplit=1)[0])
Prompt To Fix With AI
This is a comment left during a code review.
Path: agent/utils.py
Line: 97-98

Comment:
`subprocess.check_output` without a `timeout` blocks indefinitely if `du` stalls on an unresponsive NFS/FUSE mount, holding the gunicorn worker in the same way the old Python walk did. A reasonable upper bound (e.g. 120 s) would surface the failure quickly instead of silently pinning a worker.

```suggestion
    output = subprocess.check_output(command, text=True, timeout=120)
    return int(output.split(maxsplit=1)[0])
```

How can I resolve this? If you propose a fix, please make it concise.



def is_registry_healthy(url: str, username: str, password: str) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion agent/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,8 @@ def fetch_site_status(bench, site):
@application.route("/benches/<string:bench>/sites/<string:site>/info", methods=["GET"])
@validate_bench_and_site
def fetch_site_info(bench, site):
return {"data": Server().benches[bench].sites[site].fetch_site_info()}
database_only = request.args.get("database_only") in ("1", "true", "True")
return {"data": Server().benches[bench].sites[site].fetch_site_info(database_only=database_only)}


@application.route("/benches/<string:bench>/sites/<string:site>/analytics", methods=["GET"])
Expand Down
Loading