Skip to content

Commit 61dcba1

Browse files
committed
Use memcached to track where we left off pruning
1 parent 84b139f commit 61dcba1

1 file changed

Lines changed: 24 additions & 3 deletions

File tree

crontab/PrunePageData.inc

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,33 +7,48 @@ class PrunePageData extends BackgroundJob
77
{
88
$database = SiteConfig::get()->archive_db_name ? SiteConfig::get()->archive_db_name : SiteConfig::get()->db_name;
99

10+
// See if there's a milestone marker in memcache for where we left off
11+
$memcache = new Memcached();
12+
$memcache->addServer('localhost', 11211);
13+
$last_modifieddate = $memcache->get("PrunePageData:last_modifieddate");
14+
if ($last_modifieddate !== false and is_numeric($last_modifieddate)) {
15+
echo "Starting pruning at milestone $last_modifieddate\n";
16+
} else {
17+
echo "No last milestone found, starting at the beginning\n";
18+
$last_modifieddate = 0;
19+
}
20+
1021
// We need to iteratively delete data in batches by projectid because:
1122
// 1. we base the pruning on the project's posted time
1223
// 2. there are indexes for these tables based on projectid
1324
// 3. to possibly limit our runtime
1425
$sql = sprintf(
1526
"
16-
SELECT projectid
27+
SELECT projectid, modifieddate
1728
FROM projects
1829
WHERE
1930
modifieddate <= UNIX_TIMESTAMP() - (24 * 60 * 60) * %d
31+
AND modifieddate > %d
2032
AND state = '%s'
2133
ORDER BY modifieddate
2234
",
2335
SiteConfig::get()->days_to_retain_page_data_after_posting,
36+
$last_modifieddate,
2437
DPDatabase::escape(PROJ_SUBMIT_PG_POSTED)
2538
);
2639
$result = DPDatabase::query($sql);
2740
$num_projects = mysqli_num_rows($result);
2841

29-
echo "Pruning data for $num_projects projects...\n";
42+
echo "Pruning page data for $num_projects projects...\n";
3043

3144
$num_projects_pruned = 0;
32-
while ([$project_id] = mysqli_fetch_row($result)) {
45+
while ([$project_id, $modifieddate] = mysqli_fetch_row($result)) {
3346
if ($this->watch->read() >= $this->web_context_max_runtime_s) {
3447
break;
3548
}
3649

50+
echo "Pruning page data for $project_id ($modifieddate)...\n";
51+
3752
// first wordcheck_events
3853
$sql = sprintf(
3954
"
@@ -55,6 +70,12 @@ class PrunePageData extends BackgroundJob
5570
DPDatabase::query($sql);
5671

5772
$num_projects_pruned += 1;
73+
74+
$last_modifieddate = $modifieddate;
75+
}
76+
77+
if ($memcache->set("PrunePageData:last_modifieddate", $last_modifieddate) !== false) {
78+
echo "Set last milestone to $last_modifieddate\n";
5879
}
5980

6081
$leftover_projects = $num_projects - $num_projects_pruned;

0 commit comments

Comments
 (0)