Skip to content

Commit 8257f84

Browse files
committed
feat(c2pa-monitor): L5 wire add_attachment and integration tests
- capture_for_attachment, sidecar, Record persistence - C2pa_MonitorTest; README: full flow, DIF schema cross-links, out of scope Made-with: Cursor
1 parent 70040b2 commit 8257f84

3 files changed

Lines changed: 605 additions & 41 deletions

File tree

includes/Experiments/C2pa_Monitor/C2pa_Monitor.php

Lines changed: 185 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
/**
2424
* C2PA Monitor experiment class.
2525
*
26-
* Layer 1: experiment metadata and registration only; no capture hook.
26+
* Hooks into add_attachment and captures a structured `_wpai_monitor_record`
27+
* for every uploaded image. The capture is read-only, fail-open, and never
28+
* blocks the upload pipeline.
2729
*
2830
* @since 0.7.0
2931
*/
@@ -78,7 +80,142 @@ protected function load_metadata(): array {
7880
* {@inheritDoc}
7981
*/
8082
public function register(): void {
81-
// Intake hook is registered in a later layer.
83+
add_action( 'add_attachment', array( $this, 'capture_for_attachment' ), 20, 1 );
84+
}
85+
86+
/**
87+
* Captures C2PA presence and raw manifest for a freshly created attachment.
88+
*
89+
* Wrapped in a fail-open boundary: issues are recorded in the `errors`
90+
* array inside the persisted postmeta (when this experiment applies to the
91+
* attachment) alongside whatever partial data was collected. This handler
92+
* never throws, never returns an error, and never blocks the upload.
93+
* Unsupported MIME types are left untouched: no postmeta is written.
94+
*
95+
* @since 0.7.0
96+
*
97+
* @param int $attachment_id The newly created attachment ID.
98+
* @return void
99+
*/
100+
public function capture_for_attachment( int $attachment_id ): void {
101+
$started_at = microtime( true );
102+
$should_persist = true;
103+
$errors = array();
104+
$source = array(
105+
'attachment_id' => $attachment_id,
106+
'original_path_relative' => '',
107+
'size_bytes' => 0,
108+
'mime' => '',
109+
);
110+
$c2pa = array(
111+
'present' => false,
112+
'format' => null,
113+
);
114+
115+
try {
116+
$mime = (string) get_post_mime_type( $attachment_id );
117+
$source['mime'] = $mime;
118+
119+
if ( ! self::is_supported_mime( $mime ) ) {
120+
$should_persist = false;
121+
return;
122+
}
123+
124+
$path = self::get_original_path( $attachment_id );
125+
if ( '' === $path || ! is_readable( $path ) ) {
126+
$errors[] = array(
127+
'stage' => 'resolve_path',
128+
'message' => 'Attachment file is not readable.',
129+
);
130+
return;
131+
}
132+
133+
$size = filesize( $path );
134+
if ( false === $size ) {
135+
$errors[] = array(
136+
'stage' => 'stat',
137+
'message' => 'filesize() returned false.',
138+
);
139+
return;
140+
}
141+
142+
$source['size_bytes'] = (int) $size;
143+
$source['original_path_relative'] = self::relative_to_uploads( $path );
144+
145+
if ( $size > self::MAX_SCAN_BYTES ) {
146+
$errors[] = array(
147+
'stage' => 'size_cap',
148+
'message' => sprintf( 'File exceeds MAX_SCAN_BYTES (%d).', self::MAX_SCAN_BYTES ),
149+
);
150+
return;
151+
}
152+
153+
$detector = new Format_Detector();
154+
$format = $detector->detect_format( $path );
155+
$c2pa['format'] = $format;
156+
157+
if ( null === $format ) {
158+
return;
159+
}
160+
161+
$location = $detector->find_manifest_location( $path, $format );
162+
if ( null === $location ) {
163+
return;
164+
}
165+
166+
$reader = new Manifest_Reader();
167+
$manifest = $reader->read( $path, $location );
168+
if ( null === $manifest ) {
169+
$errors[] = array(
170+
'stage' => 'read_manifest',
171+
'message' => 'Manifest_Reader returned null.',
172+
);
173+
return;
174+
}
175+
176+
$writer = new Sidecar_Writer();
177+
$rel = $writer->write( $attachment_id, $manifest );
178+
179+
$c2pa = array(
180+
'present' => true,
181+
'format' => $manifest->format,
182+
'container' => $manifest->container,
183+
'manifest_sha256' => $manifest->sha256,
184+
'manifest_length' => $manifest->bytes_length,
185+
'sidecar_path_relative' => $rel,
186+
'decoded' => null,
187+
);
188+
} catch ( \RuntimeException $e ) {
189+
$errors[] = array(
190+
'stage' => 'sidecar_write',
191+
'message' => $e->getMessage(),
192+
);
193+
} catch ( \Throwable $e ) {
194+
$errors[] = array(
195+
'stage' => 'unexpected',
196+
'message' => $e->getMessage(),
197+
);
198+
} finally {
199+
if ( $should_persist ) {
200+
$duration_ms = (int) round( ( microtime( true ) - $started_at ) * 1000 );
201+
Record::store(
202+
$attachment_id,
203+
array(
204+
'schema_version' => self::SCHEMA_VERSION,
205+
'captured_at' => gmdate( 'Y-m-d\TH:i:s\Z' ),
206+
'duration_ms' => $duration_ms,
207+
'source' => $source,
208+
'traditional' => array(
209+
'exif' => array(),
210+
'iptc' => array(),
211+
'xmp' => array(),
212+
),
213+
'c2pa' => $c2pa,
214+
'errors' => $errors,
215+
)
216+
);
217+
}
218+
}
82219
}
83220

84221
/**
@@ -96,4 +233,50 @@ public static function is_supported_mime( string $mime ): bool {
96233
true
97234
);
98235
}
236+
237+
/**
238+
* Resolves the absolute path to the original uploaded file.
239+
*
240+
* Falls back to get_attached_file() when wp_get_original_image_path() does
241+
* not return a usable path (non-image attachments, edited media, etc.).
242+
*
243+
* @since 0.7.0
244+
*
245+
* @param int $attachment_id Attachment ID.
246+
* @return string Absolute filesystem path, or empty string when unresolved.
247+
*/
248+
private static function get_original_path( int $attachment_id ): string {
249+
if ( function_exists( 'wp_get_original_image_path' ) ) {
250+
$path = wp_get_original_image_path( $attachment_id );
251+
if ( is_string( $path ) && '' !== $path ) {
252+
return $path;
253+
}
254+
}
255+
256+
$path = get_attached_file( $attachment_id );
257+
return is_string( $path ) ? $path : '';
258+
}
259+
260+
/**
261+
* Returns the path relative to the uploads basedir, or the absolute path
262+
* if it lives outside uploads.
263+
*
264+
* @since 0.7.0
265+
*
266+
* @param string $absolute Absolute path.
267+
* @return string Relative path or original absolute path.
268+
*/
269+
private static function relative_to_uploads( string $absolute ): string {
270+
$uploads = wp_upload_dir( null, false );
271+
if ( ! is_array( $uploads ) || empty( $uploads['basedir'] ) ) {
272+
return $absolute;
273+
}
274+
275+
$basedir = trailingslashit( (string) $uploads['basedir'] );
276+
if ( 0 === strpos( $absolute, $basedir ) ) {
277+
return substr( $absolute, strlen( $basedir ) );
278+
}
279+
280+
return $absolute;
281+
}
99282
}
Lines changed: 78 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,35 @@
11
# C2PA Monitor
22

3-
Read-only experiment for [C2PA Content Credentials](https://c2pa.org/) in uploaded
4-
images. Layers land incrementally on the integration branch; this README grows with
5-
each layer.
3+
Read-only experiment that detects [C2PA Content Credentials](https://c2pa.org/) in
4+
freshly uploaded images and captures the raw manifest store before WordPress's
5+
image processing pipeline destroys it.
66

77
## Status
88

9-
Experimental. Scaffolding, record format, and parsing are delivered in separate
10-
iterations. See the integration branch commit messages for the layer map.
9+
Experimental. No UI, no cryptographic verification, no JUMBF / CBOR decoding in
10+
this pass; richer claim summaries and UI are deferred. The feature was
11+
assembled in reviewable layers on the integration branch (register → record →
12+
detection → reader/sidecar → hook).
13+
14+
## What it does
15+
16+
On every successful image upload (`add_attachment` priority 20):
17+
18+
1. Resolve the original on-disk file via `wp_get_original_image_path()`.
19+
2. Sniff magic bytes for JPEG, PNG, or WebP. Other MIME types are skipped.
20+
3. Walk the container looking for the C2PA storage segment:
21+
- JPEG: contiguous `APP11` (0xFFEB) markers carrying a JUMBF payload tagged
22+
with the literal `c2pa` (or `jumb`) byte sequence.
23+
- PNG: a `caBX` chunk.
24+
- WebP: a top-level RIFF chunk of type `C2PA`.
25+
4. If found, stream the raw manifest bytes once, computing SHA-256 in flight,
26+
and persist the bytes to a sidecar file under `wp-content/uploads/ai-c2pa/`.
27+
5. Write a structured `_wpai_monitor_record` postmeta entry pointing at the
28+
sidecar.
29+
30+
The handler is wrapped in a `try / catch ( Throwable )` boundary and writes a
31+
record on every supported MIME type even if every stage fails. The upload
32+
itself never blocks.
1133

1234
## Postmeta record
1335

@@ -19,37 +41,44 @@ repository
1941
([`wpai-monitor-record/schema.json`](https://raw.githubusercontent.com/decentralized-identity/credential-schemas/main/community-schemas/WordPress/schemas/wpai-monitor-record/schema.json)),
2042
extending
2143
[`media-provenance-capture`](https://raw.githubusercontent.com/decentralized-identity/credential-schemas/main/community-schemas/OpenVerifiable/schemas/media-provenance-capture/schema.json)
22-
(CMS-agnostic base). Open a DIF pull request in parallel; Layer 2+ PRs in this
23-
repo reference that schema PR.
24-
25-
For human-readable shape while iterating:
44+
(CMS-agnostic base). Cross-link the DIF pull request in PR descriptions.
2645

2746
```jsonc
28-
// $schema (optional) — for validators once the DIF files are on main
2947
{
30-
"schema_version": 1,
31-
"captured_at": "2026-04-22T19:30:00Z",
32-
"duration_ms": 47,
33-
"source": {
34-
"attachment_id": 1234,
35-
"original_path_relative": "2026/04/photo.jpg",
36-
"size_bytes": 2841093,
37-
"mime": "image/jpeg"
38-
},
39-
"traditional": { "exif": {}, "iptc": {}, "xmp": {} },
40-
"c2pa": {
41-
"present": true,
42-
"format": "jpeg",
43-
"container": "APP11/JUMBF",
44-
"manifest_sha256": "ab12...",
45-
"manifest_length": 184213,
46-
"sidecar_path_relative": "ai-c2pa/1234.jpeg.c2pa",
47-
"decoded": null
48-
},
49-
"errors": []
48+
"schema_version": 1,
49+
"captured_at": "2026-04-22T19:30:00Z",
50+
"duration_ms": 47,
51+
"source": {
52+
"attachment_id": 1234,
53+
"original_path_relative": "2026/04/photo.jpg",
54+
"size_bytes": 2841093,
55+
"mime": "image/jpeg"
56+
},
57+
"traditional": {
58+
"exif": {},
59+
"iptc": {},
60+
"xmp": {}
61+
},
62+
"c2pa": {
63+
"present": true,
64+
"format": "jpeg",
65+
"container": "APP11/JUMBF",
66+
"manifest_sha256": "ab12...",
67+
"manifest_length": 184213,
68+
"sidecar_path_relative": "ai-c2pa/1234.jpeg.c2pa",
69+
"decoded": null
70+
},
71+
"errors": []
5072
}
5173
```
5274

75+
When no manifest is found, `c2pa` collapses to
76+
`{ "present": false, "format": <detected or null> }` and no sidecar is written.
77+
78+
`c2pa.decoded` is reserved for a follow-up (claim generator, `digitalSourceType`,
79+
action history). `traditional.*` are reserved for a future pass that promotes
80+
WordPress's existing EXIF / IPTC / XMP extraction into the same record.
81+
5382
## Sidecar layout
5483

5584
```
@@ -63,7 +92,7 @@ wp-content/uploads/ai-c2pa/
6392

6493
```nginx
6594
location ^~ /wp-content/uploads/ai-c2pa/ {
66-
deny all;
95+
deny all;
6796
}
6897
```
6998

@@ -79,6 +108,17 @@ meta. Sidecars are reversible, cheap, and mirror how core treats
79108
`wp_get_original_image_path()` (data lives next to the image, the database
80109
holds a reference).
81110

111+
## Constraints
112+
113+
- **Read-only** — never mutates images, manifests, or core attachment fields.
114+
- **Fail-open** — every error path writes a record and returns; the upload
115+
always succeeds.
116+
- **No external dependencies** — no Composer additions, no outbound HTTP, no
117+
shell-outs. Pure PHP byte parsing.
118+
- **Bounded scan** — files larger than `C2pa_Monitor::MAX_SCAN_BYTES` (64 MiB) are
119+
skipped; individual manifest payloads are capped at
120+
`Manifest_Reader::MAX_MANIFEST_BYTES` (16 MiB).
121+
82122
## Test fixtures
83123

84124
Synthetic fixtures are generated at runtime by
@@ -87,12 +127,11 @@ just well-formed enough at the container level to drive the detector and are
87127
**not** valid signed C2PA assets. Generating them at runtime keeps binary
88128
blobs out of the repo and avoids any third-party fixture licensing.
89129

90-
## Constraints
130+
## Out of scope (this release)
91131

92-
- **Read-only** — never mutates images, manifests, or core attachment fields.
93-
- **Fail-open** — every error path should write a record and return; the upload
94-
must not block.
95-
- **No external dependencies** — no Composer additions, no outbound HTTP, no
96-
shell-outs for the capture path. Pure PHP byte parsing.
97-
- **Bounded scan** — files larger than `C2pa_Monitor::MAX_SCAN_BYTES` (64 MiB)
98-
are skipped; individual manifest payloads are capped in `Manifest_Reader`.
132+
- JUMBF box reader and CBOR decoder.
133+
- Populating `c2pa.decoded` with claim generator / digital source type / action
134+
history.
135+
- Admin UI, media library badge, icon overlay.
136+
- Cryptographic verification of manifests.
137+
- Preserving manifests through WordPress's GD / Imagick subsize pipeline.

0 commit comments

Comments
 (0)