From 834fe6180383306999e7b067f315fe0ade4c89eb Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 22 Jun 2026 13:11:25 +0530 Subject: [PATCH 1/2] fix(site): Skip file-tree walk when refreshing only database usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A database-usage refresh went through the generic sync_info, which calls the agent's get_usage — and that recursively walks the site's entire file tree (public, private, backups) just to sum file sizes. On sites with large file trees the walk exceeds the gunicorn timeout, killing agent web workers. The dashboard polls refresh_database_usage every few seconds, so a single bloated site keeps the walk running back-to-back. Threading a database_only flag through get_site_info -> fetch_info -> sync_info fixes this: in that mode the agent is asked (via ?database_only=1) for just the cheap database size, and _sync_database_usage records a Site Usage row that carries the last-known file totals forward instead of recomputing them. Requires the matching agent change to honour ?database_only; without it the agent ignores the param and falls back to the full walk (current behaviour), so the two can be deployed in either order safely. Co-authored-by: Claude Opus 4.8 --- press/agent.py | 7 ++++-- press/press/doctype/site/site.py | 34 ++++++++++++++++++++++----- press/press/doctype/site/test_site.py | 25 ++++++++++++++++++++ 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/press/agent.py b/press/agent.py index 54b3f376237..895a60df916 100644 --- a/press/agent.py +++ b/press/agent.py @@ -1198,8 +1198,11 @@ def get_site_sid(self, site, user=None): result = self.get(f"benches/{site.bench}/sites/{site.name}/sid") return result and result.get("sid") - def get_site_info(self, site): - result = self.get(f"benches/{site.bench}/sites/{site.name}/info") + def get_site_info(self, site, database_only=False): + path = f"benches/{site.bench}/sites/{site.name}/info" + if database_only: + path += "?database_only=1" + result = self.get(path) if result: return result["data"] return None diff --git a/press/press/doctype/site/site.py b/press/press/doctype/site/site.py index d221ec16c9e..caba2c70217 100644 --- a/press/press/doctype/site/site.py +++ b/press/press/doctype/site/site.py @@ -2062,9 +2062,9 @@ def get_login_sid(self, user: str = "Administrator"): frappe.throw(f"Could not login as {user}", frappe.ValidationError) # nosemgrep return sid - def fetch_info(self): + def fetch_info(self, database_only=False): agent = Agent(self.server) - return agent.get_site_info(self) + return agent.get_site_info(self, database_only=database_only) def fetch_analytics(self): agent = Agent(self.server) @@ -2104,6 +2104,24 @@ def _sync_config_info(self, fetched_config: dict) -> bool: return True return False + def _sync_database_usage(self, fetched_usage: dict): + """Record a Site Usage row, refreshing only the database size. + + Carries the last-known file sizes forward so a database-usage refresh + doesn't trigger the expensive file-tree walk in the agent's get_usage. + """ + last = self.get_disk_usages() + self._insert_site_usage( + { + "database": fetched_usage["database"], + "database_free": fetched_usage.get("database_free", 0), + "database_free_tables": fetched_usage.get("database_free_tables", []), + "public": last["public"] or 0, + "private": last["private"] or 0, + "backups": last["backups"] or 0, + } + ) + def _sync_usage_info(self, fetched_usage: dict): """Generate a Site Usage doc for the site using the fetched_usage data. @@ -2181,14 +2199,18 @@ def _sync_database_name(self, config): return False @frappe.whitelist() - def sync_info(self, data=None): + def sync_info(self, data=None, database_only=False): """Updates Site Usage, site.config and timezone details for site.""" if not data: - data = self.fetch_info() + data = self.fetch_info(database_only=database_only) if not data: return + if database_only: + self._sync_database_usage(data["usage"]) + return + fetched_usage = data["usage"] fetched_config = data["config"] fetched_timezone = data["timezone"] @@ -3939,7 +3961,7 @@ def add_database_index(self, table, column): def refresh_database_usage(self): # Check if schema parser enabled on db server if not frappe.db.get_value("Database Server", self.database_server_name, "enable_schema_size_parser"): - self.sync_info() + self.sync_info(database_only=True) return { "synced": True, } @@ -5495,7 +5517,7 @@ def process_refresh_database_usage_job_update(job: AgentJob): site: Site = frappe.get_doc("Site", job.site) with suppress(Exception): # Don't throw error on failure of syncing also - site.sync_info() + site.sync_info(database_only=True) def on_doctype_update(): diff --git a/press/press/doctype/site/test_site.py b/press/press/doctype/site/test_site.py index 5707e427a8e..2af66423b45 100644 --- a/press/press/doctype/site/test_site.py +++ b/press/press/doctype/site/test_site.py @@ -580,6 +580,31 @@ def test_site_usage_exceed_tracking(self): self.assertIsNotNone(site.site_usage_exceeded_on) self.assertEqual(site.status, "Active") + def test_sync_info_database_only_refreshes_db_size_and_carries_files_forward(self): + site = create_test_site() + frappe.get_doc( + { + "doctype": "Site Usage", + "site": site.name, + "database": 100, + "public": 200, + "private": 300, + "backups": 400, + } + ).insert() + + # In database_only mode the agent returns only the database size; the + # file totals must be carried forward, not recomputed (no file walk). + with patch.object(Site, "fetch_info", return_value={"usage": {"database": 150}}) as fetch_info: + site.sync_info(database_only=True) + + fetch_info.assert_called_once_with(database_only=True) + latest = frappe.get_last_doc("Site Usage", {"site": site.name}) + self.assertEqual(latest.database, 150) + self.assertEqual(latest.public, 200) + self.assertEqual(latest.private, 300) + self.assertEqual(latest.backups, 400) + def test_free_sites_ignore_usage_exceed_tracking(self): team = create_test_team(free_account=False) plan_10 = create_test_plan("Site", price_usd=10.0, price_inr=750.0, plan_name="USD 10") From 7f24318954e395aa5ef229a26abdb2ac7b185fe3 Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 22 Jun 2026 13:47:44 +0530 Subject: [PATCH 2/2] fix(site): Type-hint database_only so whitelist coerces the string Over the HTTP API a query-string value like database_only=False arrives as the string "False", which is truthy and would wrongly take the fast _sync_database_usage path. Annotating the parameter as bool lets Frappe's whitelist argument coercion convert it via sbool. Co-authored-by: Claude Opus 4.8 --- press/press/doctype/site/site.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/press/press/doctype/site/site.py b/press/press/doctype/site/site.py index caba2c70217..dd2a6e0592e 100644 --- a/press/press/doctype/site/site.py +++ b/press/press/doctype/site/site.py @@ -2199,7 +2199,7 @@ def _sync_database_name(self, config): return False @frappe.whitelist() - def sync_info(self, data=None, database_only=False): + def sync_info(self, data=None, database_only: bool = False): """Updates Site Usage, site.config and timezone details for site.""" if not data: data = self.fetch_info(database_only=database_only)