contentful · ryunsong-contentful · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025 · mgoudy91
@@ -1,5 +1,5 @@
 /**
- * INTEG-3263: Document Parser Agent
+ * Document Parser Agent
  *
  * Agent that takes a Google Doc URL and content type definitions,
  * then uses OpenAI to extract structured entries from the document
@@ -45,6 +45,7 @@ export async function createDocument(config: DocumentParserConfig): Promise<Fina
   });
 
   const prompt = buildExtractionPrompt({ contentTypes, googleDocContent, locale });
+
   const result = await generateObject({
     model: openaiClient(modelVersion),
     schema: FinalEntriesResultSchema,
@@ -56,6 +57,7 @@ export async function createDocument(config: DocumentParserConfig): Promise<Fina
   return result.object as FinalEntriesResult;
 }
 
+// These should be improved by having an example prompt on top of this zero shot prompt
 function buildSystemPrompt(): string {
   return `You are an expert content extraction AI that analyzes documents and extracts structured content based on Contentful content type definitions.
 
@@ -65,15 +67,40 @@ Your role is to:
 3. Extract relevant information from the document that matches the content type structure
 4. Create properly formatted entries that are ready to be created in Contentful via the CMA API
 
-Important guidelines:
+CRITICAL FIELD TYPE RULES - READ CAREFULLY:
+- Symbol: Short text (max 256 characters) - use for titles, names, IDs ✓
+- Text: Long text (any length) - use for descriptions, content ✓
+- Number: Integer or decimal values only ✓
+- Boolean: true or false only ✓
+- Date: ISO 8601 format (YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss.sssZ) ✓
+- Location: { lat: number, lon: number } ✓
+- Object: JSON object (use sparingly, check validations) ✓
+- Array (of Symbol/Text/Number): Array of PRIMITIVE values ONLY ✓
+  Example: ["value1", "value2"] or [1, 2, 3]
+- Array (of Link): ❌ NEVER USE - these reference other entries, skip entirely
+  Example: DO NOT create [{ title: "x", content: "y" }] - this will FAIL
+- Link/Reference: ❌ NEVER USE - skip these fields (they reference other entries)
+- RichText: ❌ NEVER USE - complex format not supported
+
+FIELD FORMAT RULES:
 - Each entry must have a contentTypeId that matches one of the provided content types
-- Fields must be in the correct format: { "fieldId": { "locale": value } }
-- Respect field types (Text, Symbol, RichText, Number, Boolean, Date, Reference, etc.)
+- Fields must be in the format: { "fieldId": { "locale": value } }
 - Only include fields that exist in the content type definition
+- NEVER include Reference/Link fields (type: "Link")
+- NEVER include fields with type "Array" if items.type is "Link"
+- NEVER create arrays of objects like [{ title: "x", content: "y" }] - this will FAIL
+- If a field type is unclear or complex, SKIP it rather than guess
+
+COMMON MISTAKES TO AVOID:
+❌ WRONG: { "sections": { "en-US": [{ "title": "...", "content": "..." }] } }
+✓ CORRECT: Skip "sections" field entirely if it's an Array of Links
+✓ CORRECT: { "tags": { "en-US": ["tag1", "tag2", "tag3"] } } (if tags is Array of Symbol)
+
+EXTRACTION GUIDELINES:
 - Extract all relevant content from the document - don't skip entries
-- If a field is required in the content type, ensure it's populated
-- For rich text fields, extract formatted content when possible
-- Be thorough and extract as many valid entries as you can find in the document`;
+- If a required field cannot be populated from the document, use a sensible default or placeholder
+- Be thorough and extract as many valid entries as you can find
+- Focus on simple fields: Symbol, Text, Number, Boolean, Date`;
 }
 
 function buildExtractionPrompt({
@@ -88,21 +115,42 @@ function buildExtractionPrompt({
   const contentTypeList = contentTypes.map((ct) => `${ct.name} (ID: ${ct.sys.id})`).join(', ');
   const totalFields = contentTypes.reduce((sum, ct) => sum + (ct.fields?.length || 0), 0);
 
-  // Create a simplified view of content types for the prompt
-  const contentTypeDefinitions = contentTypes.map((ct) => ({
-    id: ct.sys.id,
-    name: ct.name,
-    description: ct.description,
-    fields:
-      ct.fields?.map((field) => ({
-        id: field.id,
-        name: field.name,
-        type: field.type,
-        required: field.required,
-        localized: field.localized,
-        validations: field.validations,
-      })) || [],
-  }));
+  // Create a detailed view of content types, filtering out unsupported field types
+  const contentTypeDefinitions = contentTypes.map((ct) => {
+    const fields =
+      ct.fields?.map((field) => {
+        const isLinkType = field.type === 'Link';
+        const isArrayOfLinks = field.type === 'Array' && (field.items as any)?.type === 'Link';
+        const isRichText = field.type === 'RichText';
+        const shouldSkip = isLinkType || isArrayOfLinks || isRichText;
+
+        return {
+          id: field.id,
+          name: field.name,
+          type: field.type,
+          linkType: (field as any).linkType,
+          items: field.type === 'Array' ? (field.items as any) : undefined,
+          required: field.required,
+          localized: field.localized,
+          validations: field.validations,
+          SKIP: shouldSkip,
+          SKIP_REASON: shouldSkip
+            ? isLinkType
+              ? 'Link/Reference field - cannot be populated without entry IDs'
+              : isArrayOfLinks
+              ? 'Array of Links - cannot be populated without entry IDs'
+              : 'RichText field - complex format not supported'
+            : undefined,
+        };
+      }) || [];
+
+    return {
+      id: ct.sys.id,
+      name: ct.name,
+      description: ct.description,
+      fields,
+    };
+  });
 
   return `Extract structured entries from the following document based on the provided Contentful content type definitions.
 
@@ -117,13 +165,25 @@ ${JSON.stringify(contentTypeDefinitions, null, 2)}
 DOCUMENT CONTENT:
 ${googleDocContent}
 
-INSTRUCTIONS:
-1. Analyze the document and identify content that matches the provided content type structures
-2. Extract all relevant entries from the document
-3. For each entry, use the contentTypeId that best matches the content
-4. Format fields correctly: { "fieldId": { "${locale}": value } }
-5. Ensure all required fields are populated
-6. Be thorough - extract all valid content from the document
+CRITICAL INSTRUCTIONS:
+1. **SKIP ALL FIELDS WHERE "SKIP": true** - Do NOT include these fields in your output
+2. Look at each field definition - if it has "SKIP": true, completely ignore that field
+3. Only include fields where "SKIP" is false or not present
+4. Analyze the document and identify content that matches the provided content type structures
+5. Extract all relevant entries from the document
+6. For each entry, use the contentTypeId that best matches the content
+7. Format fields correctly: { "fieldId": { "${locale}": value } }
+8. Match field types exactly:
+   - Symbol: string (max 256 chars)
+   - Text: string (any length)
+   - Number: number
+   - Boolean: boolean
+   - Date: ISO 8601 string
+   - Array: array of primitives (strings or numbers ONLY)
+   - Object: JSON object
+9. For required fields (required: true) that are NOT marked SKIP: true, ensure they are populated
+10. If you cannot populate a required field from the document, use a sensible default or placeholder
+11. Be thorough - extract all valid content from the document
 
 Return the extracted entries in the specified JSON schema format.`;
 }
@@ -9,6 +9,7 @@ import { createDocument } from './agents/documentParserAgent/documentParser.agen
 import { fetchContentTypes } from './service/contentTypeService';
 import { initContentfulManagementClient } from './service/initCMAClient';
 import { fetchGoogleDoc } from './service/googleDriveService';
+import { createEntries } from './service/entryService';
 
 export type AppActionParameters = {
   contentTypeIds: string[];
@@ -32,18 +33,15 @@ export const handler: FunctionEventHandler<
 ) => {
   const { contentTypeIds, googleDocUrl } = event.body;
   const { openAiApiKey } = context.appInstallationParameters as AppInstallationParameters;
-  // INTEG-3262 and INTEG-3263: Take in Content Type, Prompt, and Upload File from user
   const cma = initContentfulManagementClient(context);
   const contentTypes = await fetchContentTypes(cma, new Set<string>(contentTypeIds));
 
-  const contentTypeParserAgentResult = await analyzeContentTypes({ contentTypes, openAiApiKey });
-  // console.log('contentTypeParserAgentResult', contentTypeParserAgentResult);
+  // Commented out to preserver as much time as possible due to the 30 second limit for App functions
+  // const contentTypeParserAgentResult = await analyzeContentTypes({ contentTypes, openAiApiKey });
 
   // INTEG-3261: Pass the ai content type response to the observer for analysis
   // createContentTypeObservationsFromLLMResponse()
 
-  // INTEG-3263: Implement the document parser agent
-  // Pass the content types to the document parser so it can extract entries based on the structure
   const aiDocumentResponse = await createDocument({
     googleDocUrl,
     openAiApiKey,
@@ -55,16 +53,26 @@ export const handler: FunctionEventHandler<
 
   // INTEG-3264: Create the entries in Contentful using the entry service
-  // INTEG-3264: Create the entries in Contentful using the entry service
-  // INTEG-3264: Create the entries in Contentful using the entry service
   // The aiDocumentResponse.entries is now ready to be passed to the CMA client
-  // await createEntries(aiDocumentResponse.entries, { spaceId, environmentId, accessToken });
+  const creationResult = await createEntries(cma, aiDocumentResponse.entries, {
+    spaceId: context.spaceId,
+    environmentId: context.environmentId,
+  });
 
   // INTEG-3265: Create the assets in Contentful using the asset service
   // await createAssets()
 
   return {
     success: true,
     response: {
-      contentTypeParserAgentResult,
-      entriesReadyForCreation: aiDocumentResponse.entries,
+      // contentTypeParserAgentResult,
+      summary: aiDocumentResponse.summary,
+      totalEntriesExtracted: aiDocumentResponse.totalEntries,
+      createdEntries: creationResult.createdEntries.map((entry) => ({
+        id: entry.sys.id,
+        contentType: entry.sys.contentType.sys.id,
+      })),
+      errors: creationResult.errors,
+      successRate: `${creationResult.createdEntries.length}/${aiDocumentResponse.totalEntries}`,
     },
   };
 };
@@ -8,43 +8,62 @@ import { EntryToCreate } from '../agents/documentParserAgent/schema';
  * and creates them in Contentful using the CMA client.
  */
 
-/**
- * Creates a single entry in Contentful
- *
- * @param cma - Contentful Management API client
- * @param entry - Entry data from Document Parser Agent (matches EntryToCreate schema)
- * @returns Promise resolving to the created entry
- */
-export async function createEntry(cma: PlainClientAPI, entry: EntryToCreate): Promise<EntryProps> {
-  // TODO: Implement entry creation using the CMA client
-  // Example implementation:
-  // const createdEntry = await cma.entry.create(
-  //   { spaceId, environmentId },
-  //   {
-  //     contentTypeId: entry.contentTypeId,
-  //     fields: entry.fields
-  //   }
-  // );
-  // return createdEntry;
-  throw new Error('Not implemented');
+export interface EntryCreationResult {
+  createdEntries: EntryProps[];
+  errors: Array<{
+    contentTypeId: string;
+    error: string;
+    details?: any;
+  }>;
 }
 
 /**
  * Creates multiple entries in Contentful
  *
  * @param cma - Contentful Management API client
  * @param entries - Array of entries from Document Parser Agent output
- * @returns Promise resolving to array of created entries
+ * @param config - Space and environment configuration
+ * @returns Promise resolving to creation results with entries and errors
  */
 export async function createEntries(
   cma: PlainClientAPI,
-  entries: EntryToCreate[]
-): Promise<EntryProps[]> {
-  // TODO: Implement batch entry creation
-  // Consider implementing with proper error handling and rate limiting
-  // const createdEntries = await Promise.all(
-  //   entries.map(entry => createEntry(cma, entry))
-  // );
-  // return createdEntries;
-  throw new Error('Not implemented');
+  entries: EntryToCreate[],
+  config: { spaceId: string; environmentId: string }
+): Promise<EntryCreationResult> {
+  const { spaceId, environmentId } = config;
+  const createdEntries: EntryProps[] = [];
+  const errors: Array<{ contentTypeId: string; error: string; details?: any }> = [];
+
+  // Create entries sequentially to avoid rate limiting issues
+  // In production, you may want to implement batching and retry logic
+  for (let i = 0; i < entries.length; i++) {
+    const entry = entries[i];
+
+    try {
+      const createdEntry = await cma.entry.create(
+        { spaceId, environmentId, contentTypeId: entry.contentTypeId },
+        {
+          fields: entry.fields,
+        }
+      );
+
+      // Optionally publish the entry immediately
+      // const publishedEntry = await cma.entry.publish(
+      //   { spaceId, environmentId, entryId: createdEntry.sys.id },
+      //   createdEntry
+      // );
-      // Optionally publish the entry immediately
-      // const publishedEntry = await cma.entry.publish(
-      //   { spaceId, environmentId, entryId: createdEntry.sys.id },
-      //   createdEntry
-      // );
-      // Optionally publish the entry immediately
-      // const publishedEntry = await cma.entry.publish(
-      //   { spaceId, environmentId, entryId: createdEntry.sys.id },
-      //   createdEntry
-      // );
+
+      createdEntries.push(createdEntry);
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      console.error(`✗ Failed to create entry of type ${entry.contentTypeId}:`, error);
+      errors.push({
+        contentTypeId: entry.contentTypeId,
+        error: errorMessage,
+        details: error,
+      });
+    }
+  }
+
+  return { createdEntries, errors };
 }