diff --git a/src/packages/backend/files/path-to-files.ts b/src/packages/backend/files/path-to-files.ts new file mode 100644 index 0000000000..fdb344b0f3 --- /dev/null +++ b/src/packages/backend/files/path-to-files.ts @@ -0,0 +1,17 @@ +/* + * This file is part of CoCalc: Copyright © 2020 Sagemath, Inc. + * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details + */ + +// This is used to find files on the share server (public_paths) in "next" +// and also in the hub, for deleting shared files of projects + +import { join } from "node:path"; + +import { projects } from "@cocalc/backend/data"; + +// Given a project_id/path, return the directory on the file system where +// that path should be located. +export function pathToFiles(project_id: string, path: string): string { + return join(projects.replace("[project_id]", project_id), path); +} diff --git a/src/packages/backend/logger.ts b/src/packages/backend/logger.ts index bc4a9ded88..04454929d7 100644 --- a/src/packages/backend/logger.ts +++ b/src/packages/backend/logger.ts @@ -12,9 +12,11 @@ process.env.DEBUG_HIDE_DATE = "yes"; // since we supply it ourselves // otherwise, maybe stuff like this works: (debug as any).inspectOpts["hideDate"] = true; import debug, { Debugger } from "debug"; -import { mkdirSync, createWriteStream, statSync, ftruncate } from "fs"; -import { format } from "util"; -import { dirname, join } from "path"; + +import { createWriteStream, ftruncate, mkdirSync, statSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { format } from "node:util"; + import { logs } from "./data"; const MAX_FILE_SIZE_BYTES = 20 * 1024 * 1024; // 20MB @@ -25,12 +27,12 @@ let _trimLogFileSizePath = ""; export function trimLogFileSize() { // THIS JUST DOESN'T REALLY WORK! return; - + if (!_trimLogFileSizePath) return; let stats; try { stats = statSync(_trimLogFileSizePath); - } catch(_) { + } catch (_) { // this happens if the file doesn't exist, which is fine since "trimming" it would be a no-op return; } @@ -141,19 +143,7 @@ function initTransports() { initTransports(); -const DEBUGGERS = { - error: COCALC.extend("error"), - warn: COCALC.extend("warn"), - info: COCALC.extend("info"), - http: COCALC.extend("http"), - verbose: COCALC.extend("verbose"), - debug: COCALC.extend("debug"), - silly: COCALC.extend("silly"), -}; - -type Level = keyof typeof DEBUGGERS; - -const LEVELS: Level[] = [ +const LEVELS = [ "error", "warn", "info", @@ -161,7 +151,19 @@ const LEVELS: Level[] = [ "verbose", "debug", "silly", -]; +] as const; + +type Level = (typeof LEVELS)[number]; + +const DEBUGGERS: { [key in Level]: Debugger } = { + error: COCALC.extend("error"), + warn: COCALC.extend("warn"), + info: COCALC.extend("info"), + http: COCALC.extend("http"), + verbose: COCALC.extend("verbose"), + debug: COCALC.extend("debug"), + silly: COCALC.extend("silly"), +} as const; class Logger { private name: string; @@ -194,13 +196,13 @@ class Logger { } export interface WinstonLogger { - error: Function; - warn: Function; - info: Function; - http: Function; - verbose: Function; - debug: Function; - silly: Function; + error: Debugger; + warn: Debugger; + info: Debugger; + http: Debugger; + verbose: Debugger; + debug: Debugger; + silly: Debugger; extend: (name: string) => WinstonLogger; isEnabled: (level: Level) => boolean; } diff --git a/src/packages/backend/metrics.ts b/src/packages/backend/metrics.ts index 14551ae9cb..6500653db0 100644 --- a/src/packages/backend/metrics.ts +++ b/src/packages/backend/metrics.ts @@ -1,6 +1,6 @@ import { Counter, Gauge, Histogram } from "prom-client"; -type Aspect = "db" | "database" | "server" | "llm"; +type Aspect = "db" | "database" | "server" | "llm" | "database"; function withPrefix(aspect: Aspect, name: string): string { return `cocalc_${aspect}_${name}`; @@ -13,7 +13,7 @@ export function newCounter( name: string, help: string, labelNames: string[] = [], -) { +): Counter { name = withPrefix(aspect, name); const key = `counter-${name}`; if (cache[key] != null) { diff --git a/src/packages/backend/misc.ts b/src/packages/backend/misc.ts index 22f9c00f34..779843e7c8 100644 --- a/src/packages/backend/misc.ts +++ b/src/packages/backend/misc.ts @@ -1,4 +1,7 @@ import { createHash } from "crypto"; +import { join } from "node:path"; + +import { projects } from "@cocalc/backend/data"; import { is_valid_uuid_string } from "@cocalc/util/misc"; /* @@ -69,3 +72,14 @@ export function envForSpawn() { } return env; } + +// return the absolute home directory of given @project_id project on disk +export function homePath(project_id: string): string { + // $MOUNTED_PROJECTS_ROOT is for OnPrem and that "projects" location is only for dev/single-user + const projects_root = process.env.MOUNTED_PROJECTS_ROOT; + if (projects_root) { + return join(projects_root, project_id); + } else { + return projects.replace("[project_id]", project_id); + } +} diff --git a/src/packages/database/package.json b/src/packages/database/package.json index c5c02e9e5f..c5a11bd9cc 100644 --- a/src/packages/database/package.json +++ b/src/packages/database/package.json @@ -32,7 +32,7 @@ "lru-cache": "^7.18.3", "nats": "^2.29.3", "node-fetch": "2.6.7", - "pg": "^8.7.1", + "pg": "^8.13.0", "random-key": "^0.3.2", "read": "^1.0.7", "sql-string-escape": "^1.1.6", @@ -42,7 +42,7 @@ "devDependencies": { "@types/node": "^18.16.14", "@types/lodash": "^4.14.202", - "@types/pg": "^8.6.1", + "@types/pg": "^8.11.10", "@types/uuid": "^8.3.1", "coffeescript": "^2.5.1" }, @@ -51,7 +51,7 @@ "build": "../node_modules/.bin/tsc --build && coffee -c -o dist/ ./", "clean": "rm -rf dist", "tsc": "../node_modules/.bin/tsc --watch --pretty --preserveWatchOutput", - "test": "pnpm exec jest --forceExit --runInBand", + "test": "TZ=UTC jest --forceExit --runInBand", "prepublishOnly": "pnpm test" }, "repository": { diff --git a/src/packages/database/postgres-server-queries.coffee b/src/packages/database/postgres-server-queries.coffee index e995ae09e7..380732ff81 100644 --- a/src/packages/database/postgres-server-queries.coffee +++ b/src/packages/database/postgres-server-queries.coffee @@ -51,7 +51,7 @@ read = require('read') {site_license_manager_set} = require('./postgres/site-license/manager') {matching_site_licenses, manager_site_licenses} = require('./postgres/site-license/search') {project_datastore_set, project_datastore_get, project_datastore_del} = require('./postgres/project-queries') -{permanently_unlink_all_deleted_projects_of_user, unlink_old_deleted_projects} = require('./postgres/delete-projects') +{permanently_unlink_all_deleted_projects_of_user, unlink_old_deleted_projects, cleanup_old_projects_data} = require('./postgres/delete-projects') {get_all_public_paths, unlist_all_public_paths} = require('./postgres/public-paths') {get_personal_user} = require('./postgres/personal') {set_passport_settings, get_passport_settings, get_all_passport_settings, get_all_passport_settings_cached, create_passport, passport_exists, update_account_and_passport, _passport_key} = require('./postgres/passport') @@ -2467,6 +2467,10 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext unlink_old_deleted_projects: () => return await unlink_old_deleted_projects(@) + # async function + cleanup_old_projects_data: (max_run_m) => + return await cleanup_old_projects_data(@, max_run_m) + # async function unlist_all_public_paths: (account_id, is_owner) => return await unlist_all_public_paths(@, account_id, is_owner) diff --git a/src/packages/database/postgres/bulk-delete.test.ts b/src/packages/database/postgres/bulk-delete.test.ts new file mode 100644 index 0000000000..e06f3b4a2f --- /dev/null +++ b/src/packages/database/postgres/bulk-delete.test.ts @@ -0,0 +1,70 @@ +/* + * This file is part of CoCalc: Copyright © 2024 Sagemath, Inc. + * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details + */ + +import getPool, { initEphemeralDatabase } from "@cocalc/database/pool"; +import { uuid } from "@cocalc/util/misc"; +import { bulkDelete } from "./bulk-delete"; + +beforeAll(async () => { + await initEphemeralDatabase({}); +}, 15000); + +afterAll(async () => { + await getPool().end(); +}); + +describe("bulk delete", () => { + test("deleting projects", async () => { + const p = getPool(); + const project_id = uuid(); + const N = 100000; + + // extra entry, which has to remain + const other = uuid(); + await p.query( + "INSERT INTO project_log (id, project_id, time) VALUES($1::UUID, $2::UUID, $3::TIMESTAMP)", + [other, uuid(), new Date()], + ); + + await p.query( + `INSERT INTO project_log (id, project_id, time) + SELECT gen_random_uuid(), $1::UUID, NOW() - interval '1 second' * g.n + FROM generate_series(1, $2) AS g(n)`, + [project_id, N], + ); + + const num1 = await p.query( + "SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1", + [project_id], + ); + expect(num1.rows[0].num).toEqual(N); + + const res = await bulkDelete({ + table: "project_log", + field: "project_id", + value: project_id, + }); + + // if this ever fails, the "ret.rowCount" value is inaccurate. + // This must be replaced by "RETURNING 1" in the the query and a "SELECT COUNT(*) ..." and so. + // (and not only here, but everywhere in the code base) + expect(res.rowsDeleted).toEqual(N); + expect(res.durationS).toBeGreaterThan(0.1); + expect(res.totalPgTimeS).toBeGreaterThan(0.1); + expect(res.totalWaitS).toBeGreaterThan(0.1); + expect((res.totalPgTimeS * 10) / res.totalWaitS).toBeGreaterThan(0.5); + + const num2 = await p.query( + "SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1", + [project_id], + ); + expect(num2.rows[0].num).toEqual(0); + + const otherRes = await p.query("SELECT * FROM project_log WHERE id = $1", [ + other, + ]); + expect(otherRes.rows[0].id).toEqual(other); + }, 10000); +}); diff --git a/src/packages/database/postgres/bulk-delete.ts b/src/packages/database/postgres/bulk-delete.ts new file mode 100644 index 0000000000..04519e5b2f --- /dev/null +++ b/src/packages/database/postgres/bulk-delete.ts @@ -0,0 +1,98 @@ +import { escapeIdentifier } from "pg"; + +import getLogger from "@cocalc/backend/logger"; +import { envToInt } from "@cocalc/backend/misc/env-to-number"; +import getPool from "@cocalc/database/pool"; +import { SCHEMA } from "@cocalc/util/schema"; + +const log = getLogger("db:bulk-delete"); +const D = log.debug; + +type Field = + | "project_id" + | "account_id" + | "target_project_id" + | "source_project_id"; + +const MAX_UTIL_PCT = envToInt("COCALC_DB_BULK_DELETE_MAX_UTIL_PCT", 10); +// adjust the time limits: by default, we aim to keep the operation between 0.1 and 0.2 secs +const MAX_TIME_TARGET_MS = envToInt( + "COCALC_DB_BULK_DELETE_MAX_TIME_TARGET_MS", + 100, +); +const MAX_TARGET_S = MAX_TIME_TARGET_MS / 1000; +const MIN_TARGET_S = MAX_TARGET_S / 2; +const DEFAULT_LIMIT = envToInt("COCALC_DB_BULK_DELETE_DEFAULT_LIMIT", 16); +const MAX_LIMIT = envToInt("COCALC_DB_BULK_DELETE_MAX_LIMIT", 32768); + +interface Opts { + table: string; // e.g. project_log, etc. + field: Field; // for now, we only support a few + id?: string; // default "id", the ID field in the table, which identifies each row uniquely + value: string; // a UUID + limit?: number; // default 1024 + maxUtilPct?: number; // 0-100, percent +} + +type Ret = Promise<{ + rowsDeleted: number; + durationS: number; + totalWaitS: number; + totalPgTimeS: number; +}>; + +function deleteQuery(table: string, field: string, id: string) { + const T = escapeIdentifier(table); + const F = escapeIdentifier(field); + const ID = escapeIdentifier(id); + + return ` +DELETE FROM ${T} +WHERE ${ID} IN ( + SELECT ${ID} FROM ${T} WHERE ${F} = $1 LIMIT $2 +)`; +} + +export async function bulkDelete(opts: Opts): Ret { + const { table, field, value, id = "id", maxUtilPct = MAX_UTIL_PCT } = opts; + let { limit = DEFAULT_LIMIT } = opts; + // assert table name is a key in SCHEMA + if (!(table in SCHEMA)) { + throw new Error(`table ${table} does not exist`); + } + + if (maxUtilPct < 1 || maxUtilPct > 99) { + throw new Error(`maxUtilPct must be between 1 and 99`); + } + + const q = deleteQuery(table, field, id); + const pool = getPool(); + const start_ts = Date.now(); + + let rowsDeleted = 0; + let totalWaitS = 0; + let totalPgTimeS = 0; + while (true) { + const t0 = Date.now(); + const ret = await pool.query(q, [value, limit]); + const dt = (Date.now() - t0) / 1000; + rowsDeleted += ret.rowCount ?? 0; + totalPgTimeS += dt; + + const next = + dt > MAX_TARGET_S ? limit / 2 : dt < MIN_TARGET_S ? limit * 2 : limit; + limit = Math.max(1, Math.min(MAX_LIMIT, Math.round(next))); + + // wait for a bit, but not more than 1 second ~ this aims for a max utilization of 10% + const waitS = Math.min(1, dt * ((100 - maxUtilPct) / maxUtilPct)); + await new Promise((done) => setTimeout(done, 1000 * waitS)); + totalWaitS += waitS; + + D(`deleted ${ret.rowCount} | dt=${dt} | wait=${waitS} | limit=${limit}`); + + if (ret.rowCount === 0) break; + } + + const durationS = (Date.now() - start_ts) / 1000; + return { durationS, rowsDeleted, totalWaitS, totalPgTimeS }; +} diff --git a/src/packages/database/postgres/delete-projects.ts b/src/packages/database/postgres/delete-projects.ts index fa125fb143..e97bfb4580 100644 --- a/src/packages/database/postgres/delete-projects.ts +++ b/src/packages/database/postgres/delete-projects.ts @@ -7,9 +7,31 @@ Code related to permanently deleting projects. */ +import { promises as fs } from "node:fs"; + +import { pathToFiles } from "@cocalc/backend/files/path-to-files"; +import getLogger, { WinstonLogger } from "@cocalc/backend/logger"; +import { newCounter } from "@cocalc/backend/metrics"; +import { homePath } from "@cocalc/backend/misc"; +import getPool from "@cocalc/database/pool"; +import { getServerSettings } from "@cocalc/database/settings"; import { callback2 } from "@cocalc/util/async-utils"; +import { KUCALC_ON_PREMISES } from "@cocalc/util/db-schema/site-defaults"; +import { minutes_ago } from "@cocalc/util/misc"; +import { bulkDelete } from "./bulk-delete"; import { PostgreSQL } from "./types"; +const { F_OK, R_OK, W_OK } = fs.constants; + +const log = getLogger("db:delete-projects"); + +const delete_projects_prom = newCounter( + "database", + "delete_projects_total", + "Deleting projects and associated data operations counter.", + ["op"], +); + /* Permanently delete from the database all project records, where the project is explicitly deleted already (so the deleted field is true). @@ -17,10 +39,12 @@ Call this function to setup projects for permanent deletion. This blanks the user field so the user no longer can access the project, and we don't know that the user had anything to do with the project. A separate phase later then purges these projects from disk as well as the database. + +TODO: it's referenced from postgres-server-queries.coffee, but is it actually used anywhere? */ export async function permanently_unlink_all_deleted_projects_of_user( db: PostgreSQL, - account_id_or_email_address: string + account_id_or_email_address: string, ): Promise { // Get the account_id if necessary. const account_id = await get_account_id(db, account_id_or_email_address); @@ -36,7 +60,7 @@ export async function permanently_unlink_all_deleted_projects_of_user( async function get_account_id( db: PostgreSQL, - account_id_or_email_address: string + account_id_or_email_address: string, ): Promise { if (account_id_or_email_address.indexOf("@") == -1) { return account_id_or_email_address; @@ -52,20 +76,252 @@ async function get_account_id( } /* -This deletes all projects older than the given number of days, from the perspective of a user. -Another task has to run to actually get rid of the data, etc. +This removes all users from all projects older than the given number of days and marked as deleted. +In particular, users are no longer able to access that project. +The "cleanup_old_projects_data" function has to run to actually get rid of the data, etc. */ export async function unlink_old_deleted_projects( db: PostgreSQL, - age_d = 30 + age_d = 30, ): Promise { - await callback2(db._query, { + const L = log.extend("unlink_old_deleted_projects").debug; + const { rowCount } = await callback2(db._query, { query: "UPDATE projects", set: { users: null }, where: [ - "deleted = true", + "deleted = true", "users IS NOT NULL", `last_edited <= NOW() - '${age_d} days'::INTERVAL`, ], }); + L("unlinked projects:", rowCount); +} + +const Q_CLEANUP_SYNCSTRINGS = ` +SELECT s.string_id, p.project_id +FROM projects as p INNER JOIN syncstrings as s + ON p.project_id = s.project_id +WHERE p.deleted = true + AND p.users IS NULL +ORDER BY + p.project_id, s.string_id +LIMIT 1000 +`; + +const Q_CLEANUP_PROJECTS = ` +SELECT project_id +FROM projects +WHERE deleted = true + AND users IS NULL + AND coalesce(state ->> 'state', '') != 'deleted' +ORDER BY created ASC +LIMIT 1000 +`; + +/* + This more thorough delete procedure comes after the above. + It issues actual delete operations on data of projects marked as deleted. + When done, it sets the state.state to "deleted". + + The operations involves deleting all syncstrings of that project (and associated with that, patches), + and only for on-prem setups, it also deletes all the data stored in the project on disk and various tables. + + This function is called every couple of hours. Hence it checks to not run longer than the given max_run_m time (minutes). +*/ +export async function cleanup_old_projects_data( + db: PostgreSQL, + max_run_m = 60, +) { + const settings = await getServerSettings(); + const on_prem = settings.kucalc === KUCALC_ON_PREMISES; + const delete_data = settings.delete_project_data; + const L0 = log.extend("cleanup_old_projects_data"); + const L = L0.debug; + + L("args", { max_run_m, on_prem, delete_data }); + + if (!delete_data) { + L(`deleting project data is disabled ('delete_project_data' setting).`); + return; + } + + const start_ts = new Date(); + const pool = getPool(); + + let numSyncStr = 0; + let numProj = 0; + + while (true) { + if (start_ts < minutes_ago(max_run_m)) { + L(`too much time elapsed, breaking after ${numSyncStr} syncstrings`); + return; + } + + const { rows: syncstrings } = await pool.query(Q_CLEANUP_SYNCSTRINGS); + L(`deleting ${syncstrings.length} syncstrings`); + for (const { project_id, string_id } of syncstrings) { + L(`deleting syncstring ${project_id}/${string_id}`); + numSyncStr += 1; + const t0 = Date.now(); + await callback2(db.delete_syncstring, { string_id }); + const elapsed_ms = Date.now() - t0; + delete_projects_prom.labels("syncstring").inc(); + // wait a bit after deleting syncstrings, e.g. to let the standby db catch up + // this ensures a max of "10%" utilization of the database – or wait 1 second + await new Promise((done) => + setTimeout(done, Math.min(1000, elapsed_ms * 9)), + ); + } + + const { rows: projects } = await pool.query(Q_CLEANUP_PROJECTS); + L(`deleting the data of ${projects.length} projects`); + for (const { project_id } of projects) { + const L2 = L0.extend(project_id).debug; + delete_projects_prom.labels("project").inc(); + numProj += 1; + let delRows = 0; + + // Clean up data *on* a given project. For now, remove all site licenses, status and last_active. + await pool.query( + `UPDATE projects + SET site_license = NULL, status = NULL, last_active = NULL, run_quota = NULL + WHERE project_id = $1`, + [project_id], + ); + + if (on_prem) { + // we don't delete the central_log, it has its own expiration + // such an entry is good to have for reconstructing what really happened + db.log({ + event: "delete_project", + value: { deleting: "files", project_id }, + }); + + L2(`delete all project files`); + await deleteProjectFiles(L2, project_id); + + try { + // this is something like /shared/projects/${project_id} + const shared_path = pathToFiles(project_id, ""); + L2(`deleting all shared files in ${shared_path}`); + await fs.rm(shared_path, { recursive: true, force: true }); + } catch (err) { + L2(`Unable to delete shared files: ${err}`); + } + } + + // This gets rid of all sorts of data in tables specific to the given project. + delRows += await delete_associated_project_data(L2, project_id); + db.log({ + event: "delete_project", + value: { deleting: "database", project_id }, + }); + + // now, that we're done with that project, mark it as state.state ->> 'deleted' + // in addition to the flag "deleted = true". This also updates the state.time timestamp. + await callback2(db.set_project_state, { project_id, state: "deleted" }); + L2( + `finished deleting project data | deleted ${delRows} entries | state.state="deleted"`, + ); + } + + if (projects.length === 0 && syncstrings.length === 0) { + L(`all data of deleted projects and associated syncstrings are deleted.`); + L( + `In total ${numSyncStr} syncstrings and ${numProj} projects were processed.`, + ); + return; + } + } +} + +async function delete_associated_project_data( + L2: WinstonLogger["debug"], + project_id: string, +): Promise { + // TODO: two tables reference a project, but become useless. + // There should be a fallback strategy to move these objects to another project or surface them as being orphaned. + // tables: cloud_filesystems, compute_servers + + let total = 0; + // collecting tables, where the primary key is the default (i.e. "id") and + // the field to check is always called "project_id" + const tables = [ + //"blobs", // TODO: this is a bit tricky, because data could be used elsewhere. In the future, there will be an associated account_id! + "file_access_log", + "file_use", + "jupyter_api_log", + "mentions", + "openai_chatgpt_log", + "project_log", + "public_paths", + "shopping_cart_items", + ] as const; + + for (const table of tables) { + const { rowsDeleted } = await bulkDelete({ + table, + field: "project_id", + value: project_id, + }); + total += rowsDeleted; + L2(`deleted in ${table}: ${rowsDeleted} entries`); + } + + // these tables are different, i.e. another id, or the field to check the project_id value against is called differently + + for (const field of ["target_project_id", "source_project_id"] as const) { + const { rowsDeleted } = await bulkDelete({ + table: "copy_paths", + field, + value: project_id, + }); + total += rowsDeleted; + L2(`deleted copy_paths/${field}: ${rowsDeleted} entries`); + } + + { + const { rowsDeleted } = await bulkDelete({ + table: "listings", + field: "project_id", + id: "project_id", // TODO listings has a more complex ID, which means this gets rid of everything in one go. should be fine, though. + value: project_id, + }); + total += rowsDeleted; + L2(`deleted in listings: ${rowsDeleted} entries`); + } + + { + const { rowsDeleted } = await bulkDelete({ + table: "project_invite_tokens", + field: "project_id", + value: project_id, + id: "token", + }); + total += rowsDeleted; + L2(`deleted in project_invite_tokens: ${rowsDeleted} entries`); + } + + return total; +} + +async function deleteProjectFiles( + L2: WinstonLogger["debug"], + project_id: string, +) { + const project_dir = homePath(project_id); + try { + await fs.access(project_dir, F_OK | R_OK | W_OK); + const stats = await fs.lstat(project_dir); + if (stats.isDirectory()) { + L2(`deleting all files in ${project_dir}`); + await fs.rm(project_dir, { recursive: true, force: true }); + } else { + L2(`is not a directory: ${project_dir}`); + } + } catch (err) { + L2( + `not deleting project files: either '${project_dir}' does not exist or is not accessible`, + ); + } } diff --git a/src/packages/database/postgres/site-license/hook.test.ts b/src/packages/database/postgres/site-license/hook.test.ts index f7bfbddbd6..4cb7ee5a45 100644 --- a/src/packages/database/postgres/site-license/hook.test.ts +++ b/src/packages/database/postgres/site-license/hook.test.ts @@ -17,8 +17,6 @@ pnpm test hook.test.ts * The quota function uses a deep copy operation on all its arguments to avoid this. */ -// see packages/database/pool/pool.ts for where this name is also hard coded: -process.env.PGDATABASE = "smc_ephemeral_testing_database"; import { isEqual } from "lodash"; diff --git a/src/packages/database/postgres/types.ts b/src/packages/database/postgres/types.ts index 9b290f3798..25227b2332 100644 --- a/src/packages/database/postgres/types.ts +++ b/src/packages/database/postgres/types.ts @@ -311,6 +311,8 @@ export interface PostgreSQL extends EventEmitter { cb?: CB; }); + delete_syncstring(opts: { string_id: string; cb: CB }); + projects_that_need_to_be_started(): Promise; is_connected(): boolean; @@ -323,6 +325,8 @@ export interface PostgreSQL extends EventEmitter { }>; }): Promise; + set_project_state(opts: { project_id: string; state: ProjectState["state"] }); + user_query_cancel_changefeed(opts: { id: any; cb?: CB }): void; save_blob(opts: { diff --git a/src/packages/database/test/setup.js b/src/packages/database/test/setup.js index 97701164bd..ee2e6cce0d 100644 --- a/src/packages/database/test/setup.js +++ b/src/packages/database/test/setup.js @@ -2,3 +2,6 @@ // see packages/database/pool/pool.ts for where this name is also hard coded: process.env.PGDATABASE = "smc_ephemeral_testing_database"; + +// checked for in some code to behave differently while running unit tests. +process.env.COCALC_TEST_MODE = true; diff --git a/src/packages/frontend/i18n/README.md b/src/packages/frontend/i18n/README.md index 2bd6a0ae43..431bcedc68 100644 --- a/src/packages/frontend/i18n/README.md +++ b/src/packages/frontend/i18n/README.md @@ -41,15 +41,17 @@ After introducing new messages, these are the steps to get all translations into 1. `pnpm i18n:download` - Will grab the updated files like `zh_CN.json` and save them in the `i18n` folder. + Will grab the updated files containing the translated strings (e.g. `zh_CN.json`) and save them in the `./i18n/trans/` folder. + The source of truth for these files is always the remotely stored data – hence do not ever edit these files directly. 1. `pnpm i18n:compile` - This transforms the `[locale].json` files to `[locale].compiles.json`. + This transforms the `[locale].json` translation files from the step before to `[locale].compiled.json`. This could also reveal problems, when conditional ICU messages aren't properly formatted. - E.g. `"Sí, cerrar sesión{en todas partes, seleccionar, verdadero { en todas partes} otro {}}" with ID "account.sign-out.button.ok" in file "./i18n/es_ES.json"`: In the brackets, it has to start according to the syntax: `{everywhere, select, true {..} other {}}`. + E.g. `"Sí, cerrar sesión{en todas partes, seleccionar, verdadero { en todas partes} otro {}}" with ID "account.sign-out.button.ok" in file "./i18n/es_ES.json"`: + In the brackets, it has to start according to the syntax: `{everywhere, select, true {..} other {}}`, i.e. the variable `everywhere` must stay in English. -1. Reload the `frontend` after a compile, such that `await import...` will load the updated translation file for the set locale. +1. Reload the `frontend` after a compile, such that `await import...` will load the updated compiled translation file for the configured locale. Note: if just a translation has been updated, you only need to do the `i18n:download` & `i18n:compile` steps. diff --git a/src/packages/frontend/project/warnings/deleted.tsx b/src/packages/frontend/project/warnings/deleted.tsx index 1a7a9ecf04..a7d119518f 100644 --- a/src/packages/frontend/project/warnings/deleted.tsx +++ b/src/packages/frontend/project/warnings/deleted.tsx @@ -3,21 +3,25 @@ * License: MS-RSL – see LICENSE.md for details */ -import { Alert } from "../../antd-bootstrap"; -import { Icon } from "../../components"; +import { FormattedMessage } from "react-intl"; + +import { Alert } from "@cocalc/frontend/antd-bootstrap"; +import { Icon } from "@cocalc/frontend/components"; // A warning to put on pages when the project is deleted export const DeletedProjectWarning: React.FC = () => { return ( - -

- Warning: this project is{" "} - deleted! -

-

- If you intend to use this project, you should{" "} - undelete it in project settings. -

+ + {icon} Warning: this project is deleted! + If you intend to use this project, you should undelete it in project settings.`} + values={{ + icon: , + strong: (c) => {c}, + h4: (c) =>

{c}

, + }} + />
); }; diff --git a/src/packages/hub/run/delete-projects.js b/src/packages/hub/run/delete-projects.js index 0b0d9e06e8..9d9372ad1f 100755 --- a/src/packages/hub/run/delete-projects.js +++ b/src/packages/hub/run/delete-projects.js @@ -1,9 +1,14 @@ #!/usr/bin/env node + /* Periodically delete projects. -TODO: For now, this just calls the unlink function. Later on it -should do more (actually delete data, etc.). +STATUS: +For now, this just calls the unlink function and deletes all assocated syncstrings and data. +In "onprem" mode, this also entries in various tables, which contain data specific to the deleted projects. + +TESTING: to run this in development and see logging, call it like that: +./src/packages/hub$ env DEBUG_CONSOLE=yes DEBUG=cocalc:debug:db:* pnpm cocalc-hub-delete-projects */ const postgres = require("@cocalc/database"); @@ -16,6 +21,9 @@ async function update() { console.log("unlinking old deleted projects..."); try { await db.unlink_old_deleted_projects(); + // limit the max runtime to half the interval time + const max_run_m = (INTERVAL_MS / 2) / (1000 * 60) + await db.cleanup_old_projects_data(max_run_m); } catch (err) { if (err !== null) { throw Error(`failed to unlink projects -- ${err}`); diff --git a/src/packages/next/lib/share/get-contents.ts b/src/packages/next/lib/share/get-contents.ts index 002a09b10a..47470f047d 100644 --- a/src/packages/next/lib/share/get-contents.ts +++ b/src/packages/next/lib/share/get-contents.ts @@ -3,10 +3,11 @@ * License: MS-RSL – see LICENSE.md for details */ -import pathToFiles from "./path-to-files"; import { promises as fs } from "fs"; -import { join } from "path"; import { sortBy } from "lodash"; +import { join } from "path"; + +import { pathToFiles } from "@cocalc/backend/files/path-to-files"; import { hasSpecialViewer } from "@cocalc/frontend/file-extensions"; import { getExtension } from "./util"; @@ -22,7 +23,7 @@ const LISTED_LIMITS = { html: 3 * MB, // no special viewer generic: 2 * MB, -}; +} as const; const UNLISTED_LIMITS = { ...LISTED_LIMITS, diff --git a/src/packages/next/lib/share/path-to-files.ts b/src/packages/next/lib/share/path-to-files.ts index 943d539038..83c2d6c5dc 100644 --- a/src/packages/next/lib/share/path-to-files.ts +++ b/src/packages/next/lib/share/path-to-files.ts @@ -3,24 +3,17 @@ * License: MS-RSL – see LICENSE.md for details */ -import { join } from "path"; +import { pathToFiles } from "@cocalc/backend/files/path-to-files"; import getPool from "@cocalc/database/pool"; -import { projects } from "@cocalc/backend/data"; - -// Given a project_id/path, return the directory on the file system where -// that path should be located. -export default function pathToFiles(project_id: string, path: string): string { - return join(projects.replace("[project_id]", project_id), path); -} export async function pathFromID( - id: string + id: string, ): Promise<{ projectPath: string; fsPath: string }> { // 'infinite' since actually result can't change since id determines the path (it's a reverse sha1 hash computation) const pool = getPool("infinite"); const { rows } = await pool.query( "SELECT project_id, path FROM public_paths WHERE id=$1 AND disabled IS NOT TRUE", - [id] + [id], ); if (rows.length == 0) { throw Error(`no such public path: ${id}`); diff --git a/src/packages/next/lib/share/virtual-hosts.ts b/src/packages/next/lib/share/virtual-hosts.ts index f5d6f01fe9..bc19cf137e 100644 --- a/src/packages/next/lib/share/virtual-hosts.ts +++ b/src/packages/next/lib/share/virtual-hosts.ts @@ -10,11 +10,11 @@ Support for virtual hosts. import type { Request, Response } from "express"; import basePath from "@cocalc/backend/base-path"; +import { pathToFiles } from "@cocalc/backend/files/path-to-files"; import { getLogger } from "@cocalc/backend/logger"; import isAuthenticated from "./authenticate"; import getVirtualHostInfo from "./get-vhost-info"; import { staticHandler } from "./handle-raw"; -import pathToFiles from "./path-to-files"; const logger = getLogger("virtual-hosts"); diff --git a/src/packages/next/package.json b/src/packages/next/package.json index 44ec639758..18014585fb 100644 --- a/src/packages/next/package.json +++ b/src/packages/next/package.json @@ -62,6 +62,7 @@ "@cocalc/frontend": "workspace:*", "@cocalc/server": "workspace:*", "@cocalc/util": "workspace:*", + "@types/pg": "^8.11.10", "@openapitools/openapi-generator-cli": "^2.19.1", "@types/react": "^18.3.10", "@types/react-dom": "^18.3.0", @@ -83,7 +84,7 @@ "next-rest-framework": "6.0.0-beta.4", "next-translate": "^2.6.2", "password-hash": "^1.2.2", - "pg": "^8.7.1", + "pg": "^8.13.0", "react": "^18.3.1", "react-dom": "^18.3.1", "react-google-recaptcha": "^2.1.0", diff --git a/src/packages/pnpm-lock.yaml b/src/packages/pnpm-lock.yaml index e7a02aac72..4f923b0ab1 100644 --- a/src/packages/pnpm-lock.yaml +++ b/src/packages/pnpm-lock.yaml @@ -221,7 +221,7 @@ importers: specifier: 2.6.7 version: 2.6.7(encoding@0.1.13) pg: - specifier: ^8.7.1 + specifier: ^8.13.0 version: 8.14.1 random-key: specifier: ^0.3.2 @@ -246,7 +246,7 @@ importers: specifier: ^18.16.14 version: 18.19.86 '@types/pg': - specifier: ^8.6.1 + specifier: ^8.11.10 version: 8.11.11 '@types/uuid': specifier: ^8.3.1 @@ -1177,6 +1177,9 @@ importers: '@openapitools/openapi-generator-cli': specifier: ^2.19.1 version: 2.19.1(encoding@0.1.13) + '@types/pg': + specifier: ^8.11.10 + version: 8.11.11 '@types/react': specifier: ^18.3.10 version: 18.3.10 @@ -1238,7 +1241,7 @@ importers: specifier: ^1.2.2 version: 1.2.2 pg: - specifier: ^8.7.1 + specifier: ^8.13.0 version: 8.14.1 react: specifier: ^18.3.1 diff --git a/src/packages/project/project-status/server.ts b/src/packages/project/project-status/server.ts index 53a7c44703..838ec2bd7f 100644 --- a/src/packages/project/project-status/server.ts +++ b/src/packages/project/project-status/server.ts @@ -15,14 +15,10 @@ status updates. Hence in particular, information like cpu, memory and disk are smoothed out and throttled. */ -import { getLogger } from "@cocalc/project/logger"; -import { how_long_ago_m, round1 } from "@cocalc/util/misc"; -import { version as smcVersion } from "@cocalc/util/smc-version"; import { delay } from "awaiting"; import { EventEmitter } from "events"; import { isEqual } from "lodash"; -import { get_ProjectInfoServer, ProjectInfoServer } from "../project-info"; -import { ProjectInfo } from "@cocalc/util/types/project-info/types"; + import { ALERT_DISK_FREE, ALERT_HIGH_PCT /* ALERT_MEDIUM_PCT */, @@ -36,6 +32,11 @@ import { ProjectStatus, } from "@cocalc/comm/project-status/types"; import { cgroup_stats } from "@cocalc/comm/project-status/utils"; +import { getLogger } from "@cocalc/project/logger"; +import { how_long_ago_m, round1 } from "@cocalc/util/misc"; +import { version as smcVersion } from "@cocalc/util/smc-version"; +import { ProjectInfo } from "@cocalc/util/types/project-info/types"; +import { get_ProjectInfoServer, ProjectInfoServer } from "../project-info"; // TODO: only return the "next" value, if it is significantly different from "prev" //function threshold(prev?: number, next?: number): number | undefined { @@ -83,7 +84,7 @@ export class ProjectStatusServer extends EventEmitter { constructor(testing = false) { super(); this.testing = testing; - this.dbg = (...msg) => winston.debug(...msg); + this.dbg = (...msg) => winston.debug(msg[0], ...msg.slice(1)); this.project_info = get_ProjectInfoServer(); } diff --git a/src/packages/project/usage-info/server.ts b/src/packages/project/usage-info/server.ts index 9f0c4ff9bb..07f328c604 100644 --- a/src/packages/project/usage-info/server.ts +++ b/src/packages/project/usage-info/server.ts @@ -12,17 +12,17 @@ from the ProjectInfoServer (which collects data about everything) */ import { delay } from "awaiting"; +import { throttle } from "lodash"; import { EventEmitter } from "node:events"; -import { getLogger } from "../logger"; -import { ProjectInfoServer, get_ProjectInfoServer } from "../project-info"; import { Process, ProjectInfo } from "@cocalc/util/types/project-info/types"; import type { UsageInfo } from "@cocalc/util/types/project-usage-info"; -import { throttle } from "lodash"; +import { getLogger } from "../logger"; +import { ProjectInfoServer, get_ProjectInfoServer } from "../project-info"; const L = getLogger("usage-info:server").debug; -const throttled_dbg = throttle((...args) => L(...args), 10000); +const throttled_dbg = throttle(L, 10000); function is_diff(prev: UsageInfo, next: UsageInfo, key: keyof UsageInfo) { // we assume a,b >= 0, hence we leave out Math.abs operations diff --git a/src/packages/server/nats/tiered-storage/clean.ts b/src/packages/server/nats/tiered-storage/clean.ts index 1e50f2ec18..14cf442b87 100644 --- a/src/packages/server/nats/tiered-storage/clean.ts +++ b/src/packages/server/nats/tiered-storage/clean.ts @@ -10,7 +10,7 @@ import { archiveProject, archiveAccount } from "./archive"; const logger = getLogger("tiered-storage:clean"); const log = (...args) => { - logger.debug(...args); + logger.debug("", ...args); console.log("tiered-storage:clean: ", ...args); }; diff --git a/src/packages/server/projects/control/multi-user.ts b/src/packages/server/projects/control/multi-user.ts index bf39e6617b..7e3b983eb3 100644 --- a/src/packages/server/projects/control/multi-user.ts +++ b/src/packages/server/projects/control/multi-user.ts @@ -16,6 +16,15 @@ This code is very similar to single-user.ts, except with some small modifications due to having to create and delete Linux users. */ +import getLogger from "@cocalc/backend/logger"; +import { getUid, homePath } from "@cocalc/backend/misc"; +import { + BaseProject, + CopyOptions, + getProject, + ProjectState, + ProjectStatus, +} from "./base"; import { chown, copyPath, @@ -25,22 +34,12 @@ import { getEnvironment, getState, getStatus, - homePath, isProjectRunning, launchProjectDaemon, mkdir, setupDataPath, stopProjectProcesses, } from "./util"; -import { - BaseProject, - CopyOptions, - getProject, - ProjectStatus, - ProjectState, -} from "./base"; -import getLogger from "@cocalc/backend/logger"; -import { getUid } from "@cocalc/backend/misc"; const winston = getLogger("project-control:multi-user"); @@ -71,7 +70,7 @@ class Project extends BaseProject { const status = await getStatus(this.HOME); // TODO: don't include secret token in log message. winston.debug( - `got status of ${this.project_id} = ${JSON.stringify(status)}` + `got status of ${this.project_id} = ${JSON.stringify(status)}`, ); this.saveStatusToDatabase(status); return status; @@ -155,7 +154,7 @@ class Project extends BaseProject { await copyPath( opts, this.project_id, - opts.target_project_id ? getUid(opts.target_project_id) : undefined + opts.target_project_id ? getUid(opts.target_project_id) : undefined, ); return ""; } diff --git a/src/packages/server/projects/control/single-user.ts b/src/packages/server/projects/control/single-user.ts index 7a0fcd8754..9b0844142a 100644 --- a/src/packages/server/projects/control/single-user.ts +++ b/src/packages/server/projects/control/single-user.ts @@ -19,6 +19,7 @@ This is useful for: import { kill } from "process"; import getLogger from "@cocalc/backend/logger"; +import { homePath } from "@cocalc/backend/misc"; import { BaseProject, CopyOptions, @@ -33,7 +34,6 @@ import { getProjectPID, getState, getStatus, - homePath, isProjectRunning, launchProjectDaemon, mkdir, diff --git a/src/packages/server/projects/control/util.ts b/src/packages/server/projects/control/util.ts index 19a5fc2f04..0016682841 100644 --- a/src/packages/server/projects/control/util.ts +++ b/src/packages/server/projects/control/util.ts @@ -1,21 +1,21 @@ -import { promisify } from "util"; -import { dirname, join, resolve } from "path"; -import { exec as exec0, spawn } from "child_process"; import spawnAsync from "await-spawn"; -import * as fs from "fs"; -import { writeFile } from "fs/promises"; -import { projects, root } from "@cocalc/backend/data"; -import { is_valid_uuid_string } from "@cocalc/util/misc"; -import { callback2 } from "@cocalc/util/async-utils"; -import getLogger from "@cocalc/backend/logger"; -import { CopyOptions, ProjectState, ProjectStatus } from "./base"; -import { getUid } from "@cocalc/backend/misc"; +import { exec as exec0, spawn } from "node:child_process"; +import * as fs from "node:fs"; +import { writeFile } from "node:fs/promises"; +import { dirname, join, resolve } from "node:path"; +import { promisify } from "node:util"; + import base_path from "@cocalc/backend/base-path"; +import { natsPorts, natsServer, root } from "@cocalc/backend/data"; +import getLogger from "@cocalc/backend/logger"; +import { getUid, homePath } from "@cocalc/backend/misc"; import { db } from "@cocalc/database"; -import { getProject } from "."; -import { pidFilename, pidUpdateIntervalMs } from "@cocalc/util/project-info"; import { getServerSettings } from "@cocalc/database/settings/server-settings"; -import { natsPorts, natsServer } from "@cocalc/backend/data"; +import { callback2 } from "@cocalc/util/async-utils"; +import { is_valid_uuid_string } from "@cocalc/util/misc"; +import { pidFilename, pidUpdateIntervalMs } from "@cocalc/util/project-info"; +import { getProject } from "."; +import { CopyOptions, ProjectState, ProjectStatus } from "./base"; const logger = getLogger("project-control:util"); @@ -33,10 +33,6 @@ export function dataPath(HOME: string): string { return join(HOME, ".smc"); } -export function homePath(project_id: string): string { - return projects.replace("[project_id]", project_id); -} - export function getUsername(project_id: string): string { return project_id.split("-").join(""); } @@ -109,12 +105,7 @@ export async function launchProjectDaemon(env, uid?: number): Promise { logger.debug(`launching project daemon at "${env.HOME}"...`); const cwd = join(root, "packages/project"); const cmd = "pnpm"; - const args = [ - "cocalc-project", - "--daemon", - "--init", - "project_init.sh", - ]; + const args = ["cocalc-project", "--daemon", "--init", "project_init.sh"]; logger.debug( `"${cmd} ${args.join(" ")} from "${cwd}" as user with uid=${uid}`, ); diff --git a/src/packages/server/shopping/cart/add.ts b/src/packages/server/shopping/cart/add.ts index eb4718ada3..e5720b0e96 100644 --- a/src/packages/server/shopping/cart/add.ts +++ b/src/packages/server/shopping/cart/add.ts @@ -15,12 +15,12 @@ any value to a spammer so it's very unlikely to be exploited maliciously. I did add throttling to the api handler. */ -import { isValidUUID } from "@cocalc/util/misc"; import getPool from "@cocalc/database/pool"; import { - ProductType, ProductDescription, + ProductType, } from "@cocalc/util/db-schema/shopping-cart-items"; +import { isValidUUID } from "@cocalc/util/misc"; import { getItem } from "./get"; import dayjs from "dayjs"; //import { getLogger } from "@cocalc/backend/logger"; diff --git a/src/packages/server/shopping/cart/recent-purchases.ts b/src/packages/server/shopping/cart/recent-purchases.ts index f89994d65d..d0b3a7705f 100644 --- a/src/packages/server/shopping/cart/recent-purchases.ts +++ b/src/packages/server/shopping/cart/recent-purchases.ts @@ -29,7 +29,7 @@ export default async function getRecentPurchases({ const pool = getPool(); const { rows } = await pool.query( `SELECT * FROM shopping_cart_items WHERE account_id=$1 AND purchased IS NOT NULL AND (purchased#>>'{time}')::timestamptz >= NOW() - $2::interval AND purchased#>>'{voucher_id}' IS NULL`, - [account_id, recent ?? "1 week"] + [account_id, recent ?? "1 week"], ); rows.sort((a, b) => -cmp(a.purchased?.time, b.purchased?.time)); return rows; diff --git a/src/packages/server/shopping/cart/remove.ts b/src/packages/server/shopping/cart/remove.ts index 231a423309..d36c89f5f2 100644 --- a/src/packages/server/shopping/cart/remove.ts +++ b/src/packages/server/shopping/cart/remove.ts @@ -15,7 +15,7 @@ import getPool from "@cocalc/database/pool"; // You can't remove an item more than once from a cart. export default async function removeFromCart( account_id: string, - id: number + id: number, ): Promise { if (!isValidUUID(account_id)) { throw Error("account_id is invalid"); @@ -23,8 +23,7 @@ export default async function removeFromCart( const pool = getPool(); const { rowCount } = await pool.query( "UPDATE shopping_cart_items SET removed=NOW() WHERE account_id=$1 AND id=$2 AND removed IS NULL AND purchased IS NULL", - [account_id, id] + [account_id, id], ); return rowCount ?? 0; } - diff --git a/src/packages/server/software-envs.ts b/src/packages/server/software-envs.ts index f0f0e1a6fb..ee0a759d2b 100644 --- a/src/packages/server/software-envs.ts +++ b/src/packages/server/software-envs.ts @@ -65,8 +65,7 @@ async function readConfig(purpose: Purpose): Promise { // parse the content of softwareFn as json try { const software = JSON.parse((await readFile(softwareFn)).toString()); - const dbg = (...msg) => L(...msg); - const sanitized = sanitizeSoftwareEnv({ software, registry, purpose }, dbg); + const sanitized = sanitizeSoftwareEnv({ software, registry, purpose }, L); return sanitized; } catch (err) { W(`WARNING: ${softwareFn} is not a valid JSON file -- ${err}`); diff --git a/src/packages/util/compute-states.ts b/src/packages/util/compute-states.ts index 947d00d9aa..628ea2fb1a 100644 --- a/src/packages/util/compute-states.ts +++ b/src/packages/util/compute-states.ts @@ -32,7 +32,8 @@ export type State = | "running" | "starting" | "stopping" - | "unarchiving"; + | "unarchiving" + | "deleted"; // @hsy: completely unclear what this is for. type Operation = @@ -300,4 +301,20 @@ export const COMPUTE_STATES: ComputeStates = { "migrate_live", ], }, + + // projects are deleted in hub -> postgres.delete-projects and this is a one-way operation + deleted: { + desc: defineMessage({ + id: "util.compute-states.deleted.desc", + defaultMessage: "Project is deleted", + }), + icon: "trash", + display: defineMessage({ + id: "util.compute-states.deleted.display", + defaultMessage: "Deleted", + }), + stable: true, + to: {}, + commands: [], + }, } as const; diff --git a/src/packages/util/db-schema/site-defaults.ts b/src/packages/util/db-schema/site-defaults.ts index b233259fc4..b533d61a78 100644 --- a/src/packages/util/db-schema/site-defaults.ts +++ b/src/packages/util/db-schema/site-defaults.ts @@ -88,6 +88,7 @@ export type SiteSettingsKeys = | "unlicensed_project_timetravel_limit" | "google_analytics" | "kucalc" + | "delete_project_data" | "i18n" | "dns" | "datastore" @@ -714,6 +715,13 @@ export const site_settings_conf: SiteSettings = { to_val: split_iframe_comm_hosts, to_display: num_dns_hosts, }, + delete_project_data: { + name: "Delete Project Data", + desc: "When a project has been marked as deleted, also actually delete associated data from the database and – for OnPrem and single-user dev mode only – also its files.", + default: "no", + valid: only_booleans, + to_val: to_bool, + }, email_enabled: { name: "Email sending enabled", desc: "Controls visibility of UI elements and if any emails are sent. This is independent of any particular email configuration!",