Merge pull request #27 from shinpr/feature/prompt-enhancement-improvements

shinpr · web-flow · commit 68ca72a0c431 · 2025-11-29T13:53:37.000+09:00
feat: improve prompt generation with purpose parameter and Subject-Context-Style structure
diff --git a/README.md b/README.md
@@ -178,6 +178,7 @@ The MCP server exposes a single tool for all image operations. Internally, it us
 | `maintainCharacterConsistency` | boolean | - | Maintain character appearance consistency across different poses and scenes |
 | `useWorldKnowledge` | boolean | - | Use real-world knowledge for accurate context (recommended for historical figures, landmarks, or factual scenarios) |
 | `useGoogleSearch` | boolean | - | Enable Google Search grounding to access real-time web information for factually accurate image generation. Use when prompt requires current or time-sensitive data that may have changed since the model's knowledge cutoff. Leave disabled for creative, fictional, historical, or timeless content. |
+| `purpose` | string | - | Intended use for the image (e.g., "cookbook cover", "social media post", "presentation slide"). Helps tailor visual style, quality level, and details to match the purpose. |
 
 #### Response
 
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "mcp-image",
   "mcpName": "io.github.shinpr/mcp-image",
-  "version": "0.4.2",
+  "version": "0.5.0",
   "description": "MCP server for AI image generation",
   "main": "dist/index.js",
   "bin": {
diff --git a/server.json b/server.json
@@ -8,13 +8,13 @@
     "url": "https://github.com/shinpr/mcp-image",
     "source": "github"
   },
-  "version": "0.4.2",
+  "version": "0.5.0",
   "packages": [
     {
       "registryType": "npm",
       "registryBaseUrl": "https://registry.npmjs.org",
       "identifier": "mcp-image",
-      "version": "0.4.2",
+      "version": "0.5.0",
       "transport": {
         "type": "stdio"
       },
diff --git a/src/business/__tests__/structuredPromptGenerator.test.ts b/src/business/__tests__/structuredPromptGenerator.test.ts
@@ -104,5 +104,37 @@ describe('StructuredPromptGenerator', () => {
         expect(result.data.selectedPractices).toContain('Camera Control Terminology')
       }
     })
+
+    it('should include purpose context when purpose is provided', async () => {
+      const generator = new StructuredPromptGeneratorImpl(mockGeminiTextClient)
+      const userPrompt = 'Delicious pasta dish'
+      const purpose = 'high-end Italian restaurant menu'
+
+      vi.mocked(mockGeminiTextClient.generateText).mockResolvedValue(
+        Ok('Professional food photography of artfully plated pasta')
+      )
+
+      const result = await generator.generateStructuredPrompt(userPrompt, {}, undefined, purpose)
+
+      expect(result.success).toBe(true)
+      const call = vi.mocked(mockGeminiTextClient.generateText).mock.calls[0]
+      expect(call[0]).toContain('INTENDED USE:')
+      expect(call[0]).toContain(purpose)
+    })
+
+    it('should not include purpose context when purpose is not provided', async () => {
+      const generator = new StructuredPromptGeneratorImpl(mockGeminiTextClient)
+      const userPrompt = 'A simple cat'
+
+      vi.mocked(mockGeminiTextClient.generateText).mockResolvedValue(
+        Ok('A fluffy cat with soft lighting')
+      )
+
+      const result = await generator.generateStructuredPrompt(userPrompt)
+
+      expect(result.success).toBe(true)
+      const call = vi.mocked(mockGeminiTextClient.generateText).mock.calls[0]
+      expect(call[0]).not.toContain('INTENDED USE:')
+    })
   })
 })
diff --git a/src/business/structuredPromptGenerator.ts b/src/business/structuredPromptGenerator.ts
@@ -11,24 +11,34 @@ import { GeminiAPIError } from '../utils/errors'
 
 /**
  * System prompt for structured prompt generation optimized for image generation
+ * Follows Google's recommended Subject-Context-Style structure
  */
 const SYSTEM_PROMPT = `You are an expert at crafting prompts for image generation models. Your role is to transform user requests into rich, detailed prompts that maximize image generation quality.
 
+Structure your enhancement around three core elements:
+
+1. SUBJECT (What): The main focus of the image
+   - Physical characteristics: textures, materials, colors, scale
+   - Actions, poses, expressions if applicable
+   - Distinctive features that define the subject
+
+2. CONTEXT (Where/When): The environment and conditions
+   - Setting, background, spatial relationships (foreground, midground, background)
+   - Time of day, weather, atmospheric conditions
+   - Mood and emotional tone of the scene
+
+3. STYLE (How): The visual treatment
+   - Artistic or photographic approach
+   - Lighting design: direction, quality, color temperature, shadows
+   - Camera/lens choices if relevant (focal length, depth of field, angle)
+
 Core principles:
-- Add specific details about lighting, materials, composition, and atmosphere
-- Include photographic or artistic terminology when appropriate  
-- Maintain clarity while adding richness and specificity
 - Preserve the user's original intent while enhancing detail
 - Focus on what should be present rather than what should be absent
+- Include photographic or artistic terminology when appropriate
+- Maintain clarity while adding richness and specificity
 
-When describing scenes or subjects:
-- Physical characteristics: textures, materials, colors, scale
-- Lighting: direction, quality, color temperature, shadows
-- Spatial relationships: foreground, midground, background, composition
-- Atmosphere: mood, weather, time of day, environmental conditions
-- Style: artistic direction, photographic techniques, visual treatment
-
-Your output should be a single, vivid, coherent description that an image generation model can interpret unambiguously. Make it engaging, specific, and clear.`
+Your output should weave these elements into a single, natural flowing description - not a structured list. Make it vivid, engaging, and unambiguous.`
 
 /**
  * Additional system prompt for image editing mode (when input image is provided)
@@ -68,7 +78,8 @@ export interface StructuredPromptGenerator {
   generateStructuredPrompt(
     userPrompt: string,
     features?: FeatureFlags,
-    inputImageData?: string // Optional base64-encoded image for context
+    inputImageData?: string, // Optional base64-encoded image for context
+    purpose?: string // Optional intended use for the image
   ): Promise<Result<StructuredPromptResult, Error>>
 }
 
@@ -81,7 +92,8 @@ export class StructuredPromptGeneratorImpl implements StructuredPromptGenerator
   async generateStructuredPrompt(
     userPrompt: string,
     features: FeatureFlags = {},
-    inputImageData?: string
+    inputImageData?: string,
+    purpose?: string
   ): Promise<Result<StructuredPromptResult, Error>> {
     try {
       // Validate input
@@ -90,7 +102,12 @@ export class StructuredPromptGeneratorImpl implements StructuredPromptGenerator
       }
 
       // Build complete prompt with system instruction and meta-prompt
-      const completePrompt = this.buildCompletePrompt(userPrompt, features, !!inputImageData)
+      const completePrompt = this.buildCompletePrompt(
+        userPrompt,
+        features,
+        !!inputImageData,
+        purpose
+      )
 
       // Combine system prompts for image editing mode
       const systemInstruction = inputImageData
@@ -131,7 +148,8 @@ export class StructuredPromptGeneratorImpl implements StructuredPromptGenerator
   private buildCompletePrompt(
     userPrompt: string,
     features: FeatureFlags,
-    hasInputImage: boolean
+    hasInputImage: boolean,
+    purpose?: string
   ): string {
     const featureContext = this.buildEnhancedFeatureContext(features)
 
@@ -140,10 +158,16 @@ export class StructuredPromptGeneratorImpl implements StructuredPromptGenerator
       ? `\nNOTE: An input image has been provided. Focus on preserving its original characteristics while applying the requested modifications. Maintain consistency with the source image's style, colors, and atmosphere.\n`
       : ''
 
+    // Add purpose context if provided
+    const purposeContext = purpose
+      ? `\nINTENDED USE: ${purpose}\nTailor the visual style, quality level, and details to match this purpose.\n`
+      : ''
+
     return `Transform this image generation request into a detailed, vivid prompt that will produce high-quality results:
 
 "${userPrompt}"
 ${imageEditingInstruction}
+${purposeContext}
 ${featureContext}
 
 Consider these aspects as you enhance the prompt:
diff --git a/src/server/mcpServer.ts b/src/server/mcpServer.ts
@@ -135,6 +135,11 @@ export class MCPServerImpl {
                   'Image resolution for high-quality output. Specify "2K" or "4K" when you need higher resolution images with better text rendering and fine details. Leave unspecified for standard quality.',
                 enum: ['2K', '4K'],
               },
+              purpose: {
+                type: 'string' as const,
+                description:
+                  'Intended use for the image (e.g., cookbook cover, social media post, presentation slide). Helps tailor visual style, quality level, and details to match the purpose.',
+              },
             },
             required: ['prompt'],
           },
@@ -242,7 +247,8 @@ export class MCPServerImpl {
         const promptResult = await this.structuredPromptGenerator.generateStructuredPrompt(
           params.prompt,
           features,
-          inputImageData // Pass image data for context-aware prompt generation
+          inputImageData, // Pass image data for context-aware prompt generation
+          params.purpose // Pass intended use for purpose-aware prompt generation
         )
 
         if (promptResult.success) {
diff --git a/src/types/mcp.ts b/src/types/mcp.ts
@@ -53,6 +53,8 @@ export interface GenerateImageParams {
   aspectRatio?: AspectRatio
   /** Image resolution for high-quality output (e.g., "2K", "4K"). Leave unspecified for standard quality */
   imageSize?: ImageSize
+  /** Intended use for the image (e.g., cookbook cover, social media post). Helps tailor visual style and quality */
+  purpose?: string
 }
 
 /**

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "mcp-image",`
`3`	`3`	`"mcpName": "io.github.shinpr/mcp-image",`
`4`		`- "version": "0.4.2",`
	`4`	`+ "version": "0.5.0",`
`5`	`5`	`"description": "MCP server for AI image generation",`
`6`	`6`	`"main": "dist/index.js",`
`7`	`7`	`"bin": {`
Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,8 @@ export interface GenerateImageParams {`
`53`	`53`	`aspectRatio?: AspectRatio`
`54`	`54`	`/** Image resolution for high-quality output (e.g., "2K", "4K"). Leave unspecified for standard quality */`
`55`	`55`	`imageSize?: ImageSize`
	`56`	`+ /** Intended use for the image (e.g., cookbook cover, social media post). Helps tailor visual style and quality */`
	`57`	`+ purpose?: string`
`56`	`58`	`}`
`57`	`59`
`58`	`60`	`/**`