Skip to content

Commit 99124e1

Browse files
authored
fix(memory): archive runaway memory before AI compaction (#1704)
1 parent a2c0aba commit 99124e1

2 files changed

Lines changed: 279 additions & 0 deletions

File tree

inc/Engine/AI/System/Tasks/DailyMemoryTask.php

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ public function executeTask( int $jobId, array $params ): void {
101101
$memory_content = $result['content'];
102102
$original_size = strlen( $memory_content );
103103

104+
$overflow_result = $this->maybeHandleDeterministicOverflow( $jobId, $memory, $daily, $memory_content, $original_size, $date );
105+
if ( null !== $overflow_result ) {
106+
if ( empty( $overflow_result['success'] ) ) {
107+
$this->failJob( $jobId, $overflow_result['message'] ?? 'Daily memory overflow split failed.' );
108+
return;
109+
}
110+
111+
$this->completeJob( $jobId, $overflow_result );
112+
return;
113+
}
114+
104115
// Skip if MEMORY.md is within the recommended threshold and no activity context.
105116
$context = $this->gatherContext( $params );
106117
if ( $original_size <= AgentMemory::MAX_FILE_SIZE && empty( $context ) ) {
@@ -366,6 +377,165 @@ public function executeTask( int $jobId, array $params ): void {
366377
);
367378
}
368379

380+
/**
381+
* Deterministically split very large MEMORY.md files before invoking AI.
382+
*
383+
* Extremely large memory files can exceed the practical request envelope for
384+
* non-streaming provider calls. This path archives whole tail sections verbatim
385+
* and leaves a small persistent file with an archive pointer, preserving every
386+
* byte without asking the model to process the entire oversized file.
387+
*
388+
* @param int $jobId Job ID.
389+
* @param AgentMemory $memory Agent memory facade.
390+
* @param DailyMemory $daily Daily memory facade.
391+
* @param string $memory_content Current MEMORY.md content.
392+
* @param int $original_size Original byte size.
393+
* @param string $date Archive date.
394+
* @return array|null Result array when handled, null when normal AI compaction should proceed.
395+
*/
396+
private function maybeHandleDeterministicOverflow( int $jobId, AgentMemory $memory, DailyMemory $daily, string $memory_content, int $original_size, string $date ): ?array {
397+
$threshold = (int) apply_filters(
398+
'datamachine_daily_memory_overflow_threshold',
399+
AgentMemory::MAX_FILE_SIZE * 4,
400+
array(
401+
'job_id' => $jobId,
402+
'date' => $date,
403+
'original_size' => $original_size,
404+
)
405+
);
406+
407+
if ( $threshold <= 0 || $original_size <= $threshold ) {
408+
return null;
409+
}
410+
411+
$target_size = (int) apply_filters(
412+
'datamachine_daily_memory_overflow_target_size',
413+
AgentMemory::MAX_FILE_SIZE,
414+
array(
415+
'job_id' => $jobId,
416+
'date' => $date,
417+
'original_size' => $original_size,
418+
)
419+
);
420+
$target_size = max( 1024, $target_size );
421+
422+
$split = self::splitMemorySectionsForOverflow( $memory_content, $target_size, $date );
423+
if ( empty( $split['archived'] ) ) {
424+
return null;
425+
}
426+
427+
$write_result = $memory->replace_all( $split['persistent'] );
428+
if ( empty( $write_result['success'] ) ) {
429+
return array(
430+
'success' => false,
431+
'message' => $write_result['message'],
432+
);
433+
}
434+
435+
$parts = explode( '-', $date );
436+
$archive_body = "\n### Archived from oversized MEMORY.md\n\n" . $split['archived'] . "\n";
437+
$append = $daily->append( $parts[0], $parts[1], $parts[2], $archive_body );
438+
if ( empty( $append['success'] ) ) {
439+
return array(
440+
'success' => false,
441+
'message' => $append['message'],
442+
);
443+
}
444+
445+
$archived_size = strlen( $split['archived'] );
446+
$new_size = strlen( $split['persistent'] );
447+
448+
do_action(
449+
'datamachine_log',
450+
'info',
451+
sprintf(
452+
'Daily memory overflow split complete: %s -> %s (%s archived verbatim to daily/%s)',
453+
size_format( $original_size ),
454+
size_format( $new_size ),
455+
size_format( $archived_size ),
456+
$date
457+
),
458+
array(
459+
'date' => $date,
460+
'original_size' => $original_size,
461+
'new_size' => $new_size,
462+
'archived_size' => $archived_size,
463+
'archived_blocks' => $split['archived_blocks'],
464+
'persistent_blocks' => $split['persistent_blocks'],
465+
)
466+
);
467+
468+
return array(
469+
'success' => true,
470+
'date' => $date,
471+
'original_size' => $original_size,
472+
'new_size' => $new_size,
473+
'archived_size' => $archived_size,
474+
'overflow_split' => true,
475+
'archived_blocks' => $split['archived_blocks'],
476+
'persistent_blocks' => $split['persistent_blocks'],
477+
);
478+
}
479+
480+
/**
481+
* Split markdown into persistent head sections and archived tail sections.
482+
*
483+
* @param string $content Full MEMORY.md content.
484+
* @param int $target_size Target persistent size in bytes.
485+
* @param string $date Archive date.
486+
* @return array{persistent: string, archived: string, persistent_blocks: int, archived_blocks: int}
487+
*/
488+
private static function splitMemorySectionsForOverflow( string $content, int $target_size, string $date ): array {
489+
$blocks = preg_split( '/(?=^## .+$)/m', trim( $content ), -1, PREG_SPLIT_NO_EMPTY );
490+
if ( ! is_array( $blocks ) || count( $blocks ) < 2 ) {
491+
return array(
492+
'persistent' => $content,
493+
'archived' => '',
494+
'persistent_blocks' => count( is_array( $blocks ) ? $blocks : array() ),
495+
'archived_blocks' => 0,
496+
);
497+
}
498+
499+
$persistent = array();
500+
$archived = array();
501+
$pointer = sprintf(
502+
"\n## Archived Memory Overflow\n\nOn %s, Daily Memory archived older MEMORY.md sections verbatim to `daily/%s`. Use daily memory search/read when those details are needed.\n",
503+
$date,
504+
str_replace( '-', '/', $date ) . '.md'
505+
);
506+
507+
foreach ( $blocks as $index => $block ) {
508+
$block = trim( $block );
509+
if ( '' === $block ) {
510+
continue;
511+
}
512+
513+
$candidate = implode( "\n\n", array_merge( $persistent, array( $block ) ) ) . $pointer;
514+
if ( 0 === $index || strlen( $candidate ) <= $target_size ) {
515+
$persistent[] = $block;
516+
continue;
517+
}
518+
519+
$archived[] = $block;
520+
}
521+
522+
if ( empty( $archived ) ) {
523+
return array(
524+
'persistent' => $content,
525+
'archived' => '',
526+
'persistent_blocks' => count( $persistent ),
527+
'archived_blocks' => 0,
528+
);
529+
}
530+
531+
return array(
532+
'persistent' => rtrim( implode( "\n\n", $persistent ) . $pointer ) . "\n",
533+
'archived' => implode( "\n\n", $archived ),
534+
'persistent_blocks' => count( $persistent ),
535+
'archived_blocks' => count( $archived ),
536+
);
537+
}
538+
369539
/**
370540
* {@inheritDoc}
371541
*/
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
<?php
2+
/**
3+
* Pure-PHP smoke for the Daily Memory deterministic overflow split.
4+
*
5+
* Run with: php tests/daily-memory-overflow-split-smoke.php
6+
*
7+
* @package DataMachine\Tests
8+
*/
9+
10+
declare(strict_types=1);
11+
12+
$failures = array();
13+
$passes = 0;
14+
15+
function dm_overflow_assert( bool $condition, string $label, array &$failures, int &$passes ): void {
16+
if ( $condition ) {
17+
++$passes;
18+
echo "PASS: {$label}\n";
19+
return;
20+
}
21+
22+
$failures[] = $label;
23+
echo "FAIL: {$label}\n";
24+
}
25+
26+
/**
27+
* Mirrors DailyMemoryTask::splitMemorySectionsForOverflow().
28+
*
29+
* @return array{persistent: string, archived: string, persistent_blocks: int, archived_blocks: int}
30+
*/
31+
function dm_overflow_split( string $content, int $target_size, string $date ): array {
32+
$blocks = preg_split( '/(?=^## .+$)/m', trim( $content ), -1, PREG_SPLIT_NO_EMPTY );
33+
if ( ! is_array( $blocks ) || count( $blocks ) < 2 ) {
34+
return array(
35+
'persistent' => $content,
36+
'archived' => '',
37+
'persistent_blocks' => count( is_array( $blocks ) ? $blocks : array() ),
38+
'archived_blocks' => 0,
39+
);
40+
}
41+
42+
$persistent = array();
43+
$archived = array();
44+
$pointer = sprintf(
45+
"\n## Archived Memory Overflow\n\nOn %s, Daily Memory archived older MEMORY.md sections verbatim to `daily/%s`. Use daily memory search/read when those details are needed.\n",
46+
$date,
47+
str_replace( '-', '/', $date ) . '.md'
48+
);
49+
50+
foreach ( $blocks as $index => $block ) {
51+
$block = trim( $block );
52+
if ( '' === $block ) {
53+
continue;
54+
}
55+
56+
$candidate = implode( "\n\n", array_merge( $persistent, array( $block ) ) ) . $pointer;
57+
if ( 0 === $index || strlen( $candidate ) <= $target_size ) {
58+
$persistent[] = $block;
59+
continue;
60+
}
61+
62+
$archived[] = $block;
63+
}
64+
65+
if ( empty( $archived ) ) {
66+
return array(
67+
'persistent' => $content,
68+
'archived' => '',
69+
'persistent_blocks' => count( $persistent ),
70+
'archived_blocks' => 0,
71+
);
72+
}
73+
74+
return array(
75+
'persistent' => rtrim( implode( "\n\n", $persistent ) . $pointer ) . "\n",
76+
'archived' => implode( "\n\n", $archived ),
77+
'persistent_blocks' => count( $persistent ),
78+
'archived_blocks' => count( $archived ),
79+
);
80+
}
81+
82+
$source = (string) file_get_contents( __DIR__ . '/../inc/Engine/AI/System/Tasks/DailyMemoryTask.php' );
83+
dm_overflow_assert( str_contains( $source, 'maybeHandleDeterministicOverflow' ), 'production task has deterministic overflow hook', $failures, $passes );
84+
dm_overflow_assert( str_contains( $source, 'splitMemorySectionsForOverflow' ), 'production task has section-split helper', $failures, $passes );
85+
dm_overflow_assert( str_contains( $source, 'datamachine_daily_memory_overflow_threshold' ), 'overflow threshold is filterable', $failures, $passes );
86+
dm_overflow_assert( str_contains( $source, 'datamachine_daily_memory_overflow_target_size' ), 'overflow target size is filterable', $failures, $passes );
87+
88+
$content = "# Agent Memory\n\nIntro stays.\n\n";
89+
for ( $i = 1; $i <= 8; $i++ ) {
90+
$content .= "## Section {$i}\n\n" . str_repeat( "Line {$i} persistent or session detail.\n", 12 ) . "\n";
91+
}
92+
93+
$split = dm_overflow_split( $content, 1400, '2026-05-01' );
94+
dm_overflow_assert( '' !== $split['archived'], 'oversized input produces archive content', $failures, $passes );
95+
dm_overflow_assert( str_contains( $split['persistent'], 'Archived Memory Overflow' ), 'persistent output includes archive pointer', $failures, $passes );
96+
dm_overflow_assert( str_contains( $split['persistent'], 'daily/2026/05/01.md' ), 'archive pointer names daily file path', $failures, $passes );
97+
dm_overflow_assert( str_contains( $split['persistent'], '## Section 1' ), 'persistent output keeps early sections', $failures, $passes );
98+
dm_overflow_assert( str_contains( $split['archived'], '## Section 8' ), 'archive output keeps later sections verbatim', $failures, $passes );
99+
dm_overflow_assert( ! str_contains( $split['persistent'], '## Section 8' ), 'archived sections are removed from persistent output', $failures, $passes );
100+
dm_overflow_assert( $split['persistent_blocks'] > 0, 'persistent block count reported', $failures, $passes );
101+
dm_overflow_assert( $split['archived_blocks'] > 0, 'archived block count reported', $failures, $passes );
102+
103+
$small = dm_overflow_split( "## Only\n\nSmall file.\n", 1400, '2026-05-01' );
104+
dm_overflow_assert( '' === $small['archived'], 'single-section small input does not split', $failures, $passes );
105+
106+
echo "\n{$passes} passed, " . count( $failures ) . " failed\n";
107+
if ( ! empty( $failures ) ) {
108+
exit( 1 );
109+
}

0 commit comments

Comments
 (0)