11import Foundation
22import FoundationModels
3+ import Vision
4+ import CoreImage
35
46@available ( iOS 26 , * )
57public actor IntelligenceService {
@@ -17,6 +19,66 @@ public actor IntelligenceService {
1719
1820 public init ( ) { }
1921
22+ /// Analyzes an image using Vision framework to extract visual information.
23+ ///
24+ /// - Parameter cgImage: The image to analyze.
25+ /// - Returns: A description of what's in the image.
26+ public func analyzeImage( _ cgImage: CGImage ) async throws -> String {
27+ let startTime = CFAbsoluteTimeGetCurrent ( )
28+
29+ var analysisResults : [ String ] = [ ]
30+
31+ // 1. Scene classification
32+ let sceneRequest = VNClassifyImageRequest ( )
33+
34+ // 2. Object recognition
35+ let objectRequest = VNRecognizeAnimalsRequest ( )
36+
37+ // 3. Text detection
38+ let textRequest = VNRecognizeTextRequest ( )
39+ textRequest. recognitionLevel = . fast
40+
41+ // Perform all requests
42+ let handler = VNImageRequestHandler ( cgImage: cgImage, options: [ : ] )
43+ try handler. perform ( [ sceneRequest, objectRequest, textRequest] )
44+
45+ // Process scene classifications
46+ if let sceneResults = sceneRequest. results as? [ VNClassificationObservation ] {
47+ let topScenes = sceneResults
48+ . prefix ( 3 )
49+ . filter { $0. confidence > 0.3 }
50+ . map { " \( $0. identifier) ( \( Int ( $0. confidence * 100 ) ) %) " }
51+ if !topScenes. isEmpty {
52+ analysisResults. append ( " Scenes: \( topScenes. joined ( separator: " , " ) ) " )
53+ }
54+ }
55+
56+ // Process animal recognition
57+ if let animalResults = objectRequest. results as? [ VNRecognizedObjectObservation ] {
58+ let animals = animalResults
59+ . filter { $0. confidence > 0.5 }
60+ . compactMap { $0. labels. first? . identifier }
61+ if !animals. isEmpty {
62+ analysisResults. append ( " Animals: \( animals. joined ( separator: " , " ) ) " )
63+ }
64+ }
65+
66+ // Process text recognition
67+ if let textResults = textRequest. results as? [ VNRecognizedTextObservation ] {
68+ let recognizedText = textResults
69+ . prefix ( 5 )
70+ . compactMap { $0. topCandidates ( 1 ) . first? . string }
71+ . filter { !$0. isEmpty }
72+ if !recognizedText. isEmpty {
73+ analysisResults. append ( " Text: \( recognizedText. joined ( separator: " , " ) ) " )
74+ }
75+ }
76+
77+ WPLogInfo ( " IntelligenceService.analyzeImage executed in \( ( CFAbsoluteTimeGetCurrent ( ) - startTime) * 1000 ) ms " )
78+
79+ return analysisResults. isEmpty ? " " : analysisResults. joined ( separator: " ; " )
80+ }
81+
2082 /// Suggests tags for a WordPress post.
2183 ///
2284 /// - Parameters:
@@ -155,6 +217,195 @@ public actor IntelligenceService {
155217 let postSizeLimit = Double ( IntelligenceService . contextSizeLimit) * ratio
156218 return String ( ( extract ?? post) . prefix ( Int ( postSizeLimit) ) )
157219 }
220+
221+ /// Metadata for generating alt text and captions.
222+ public struct MediaMetadata {
223+ public let filename : String ?
224+ public let title : String ?
225+ public let caption : String ?
226+ public let description : String ?
227+ public let altText : String ?
228+ public let fileType : String ?
229+ public let dimensions : String ?
230+ public let imageAnalysis : String ?
231+
232+ public init ( filename: String ? = nil , title: String ? = nil , caption: String ? = nil , description: String ? = nil , altText: String ? = nil , fileType: String ? = nil , dimensions: String ? = nil , imageAnalysis: String ? = nil ) {
233+ self . filename = filename
234+ self . title = title
235+ self . caption = caption
236+ self . description = description
237+ self . altText = altText
238+ self . fileType = fileType
239+ self . dimensions = dimensions
240+ self . imageAnalysis = imageAnalysis
241+ }
242+
243+ var hasContent : Bool {
244+ return [ filename, title, caption, description, altText, fileType, dimensions, imageAnalysis]
245+ . contains ( where: { !( $0? . isEmpty ?? true ) } )
246+ }
247+ }
248+
249+ /// Generates alt text for a media item based on available metadata.
250+ ///
251+ /// - Parameter metadata: The media metadata to use for generation.
252+ /// - Returns: Generated alt text.
253+ public func generateAltText( metadata: MediaMetadata ) async throws -> String {
254+ guard metadata. hasContent else {
255+ throw NSError ( domain: " IntelligenceService " , code: - 1 , userInfo: [
256+ NSLocalizedDescriptionKey: " Insufficient metadata to generate alt text. Please add a filename, title, or description first. "
257+ ] )
258+ }
259+
260+ let startTime = CFAbsoluteTimeGetCurrent ( )
261+
262+ let instructions = """
263+ You are helping a WordPress user generate alt text for an image.
264+ Alt text should be concise, descriptive, and accessible for screen readers.
265+
266+ **Parameters**
267+ - IMAGE_ANALYSIS: Visual analysis of the actual image content (MOST IMPORTANT)
268+ - FILENAME: the image filename
269+ - FILE_TYPE: the file type/extension
270+ - DIMENSIONS: the image dimensions
271+ - TITLE: the image title (if available)
272+ - CAPTION: the image caption (if available)
273+ - DESCRIPTION: the image description (if available)
274+
275+ **Requirements**
276+ - Generate concise alt text (1-2 sentences, max 125 characters)
277+ - Prioritize IMAGE_ANALYSIS when describing what's in the image
278+ - Focus on what the image depicts, not decorative elements
279+ - Use simple, clear language
280+ - Do not include phrases like " image of " or " picture of "
281+ - Only output the alt text, nothing else
282+ """
283+
284+ let session = LanguageModelSession (
285+ model: . init( guardrails: . permissiveContentTransformations) ,
286+ instructions: instructions
287+ )
288+
289+ var contextParts : [ String ] = [ ]
290+ if let imageAnalysis = metadata. imageAnalysis, !imageAnalysis. isEmpty {
291+ contextParts. append ( " IMAGE_ANALYSIS: ' \( imageAnalysis) ' " )
292+ }
293+ if let filename = metadata. filename, !filename. isEmpty {
294+ contextParts. append ( " FILENAME: ' \( filename) ' " )
295+ }
296+ if let fileType = metadata. fileType, !fileType. isEmpty {
297+ contextParts. append ( " FILE_TYPE: ' \( fileType) ' " )
298+ }
299+ if let dimensions = metadata. dimensions, !dimensions. isEmpty {
300+ contextParts. append ( " DIMENSIONS: ' \( dimensions) ' " )
301+ }
302+ if let title = metadata. title, !title. isEmpty {
303+ contextParts. append ( " TITLE: ' \( title) ' " )
304+ }
305+ if let caption = metadata. caption, !caption. isEmpty {
306+ contextParts. append ( " CAPTION: ' \( caption) ' " )
307+ }
308+ if let description = metadata. description, !description. isEmpty {
309+ contextParts. append ( " DESCRIPTION: ' \( description) ' " )
310+ }
311+
312+ let prompt = """
313+ Generate alt text for an image with the following information:
314+
315+ \( contextParts. joined ( separator: " \n " ) )
316+ """
317+
318+ WPLogInfo ( " IntelligenceService.generateAltText prompt: \n \( prompt) " )
319+
320+ let response = try await session. respond (
321+ to: prompt,
322+ options: GenerationOptions ( temperature: 0.7 )
323+ )
324+
325+ WPLogInfo ( " IntelligenceService.generateAltText executed in \( ( CFAbsoluteTimeGetCurrent ( ) - startTime) * 1000 ) ms " )
326+
327+ return response. content. trimmingCharacters ( in: . whitespacesAndNewlines)
328+ }
329+
330+ /// Generates a caption for a media item based on available metadata.
331+ ///
332+ /// - Parameter metadata: The media metadata to use for generation.
333+ /// - Returns: Generated caption.
334+ public func generateCaption( metadata: MediaMetadata ) async throws -> String {
335+ guard metadata. hasContent else {
336+ throw NSError ( domain: " IntelligenceService " , code: - 1 , userInfo: [
337+ NSLocalizedDescriptionKey: " Insufficient metadata to generate caption. Please add a filename, title, or description first. "
338+ ] )
339+ }
340+
341+ let startTime = CFAbsoluteTimeGetCurrent ( )
342+
343+ let instructions = """
344+ You are helping a WordPress user generate a caption for an image.
345+ Captions should be engaging, informative, and complement the image.
346+
347+ **Parameters**
348+ - IMAGE_ANALYSIS: Visual analysis of the actual image content (MOST IMPORTANT)
349+ - FILENAME: the image filename
350+ - FILE_TYPE: the file type/extension
351+ - DIMENSIONS: the image dimensions
352+ - TITLE: the image title (if available)
353+ - ALT_TEXT: the image alt text (if available)
354+ - DESCRIPTION: the image description (if available)
355+
356+ **Requirements**
357+ - Generate an engaging caption (1-2 sentences)
358+ - Prioritize IMAGE_ANALYSIS to understand what's actually in the image
359+ - Can be more creative and conversational than alt text
360+ - May include context, emotion, or storytelling elements
361+ - Only output the caption, nothing else
362+ """
363+
364+ let session = LanguageModelSession (
365+ model: . init( guardrails: . permissiveContentTransformations) ,
366+ instructions: instructions
367+ )
368+
369+ var contextParts : [ String ] = [ ]
370+ if let imageAnalysis = metadata. imageAnalysis, !imageAnalysis. isEmpty {
371+ contextParts. append ( " IMAGE_ANALYSIS: ' \( imageAnalysis) ' " )
372+ }
373+ if let filename = metadata. filename, !filename. isEmpty {
374+ contextParts. append ( " FILENAME: ' \( filename) ' " )
375+ }
376+ if let fileType = metadata. fileType, !fileType. isEmpty {
377+ contextParts. append ( " FILE_TYPE: ' \( fileType) ' " )
378+ }
379+ if let dimensions = metadata. dimensions, !dimensions. isEmpty {
380+ contextParts. append ( " DIMENSIONS: ' \( dimensions) ' " )
381+ }
382+ if let title = metadata. title, !title. isEmpty {
383+ contextParts. append ( " TITLE: ' \( title) ' " )
384+ }
385+ if let altText = metadata. altText, !altText. isEmpty {
386+ contextParts. append ( " ALT_TEXT: ' \( altText) ' " )
387+ }
388+ if let description = metadata. description, !description. isEmpty {
389+ contextParts. append ( " DESCRIPTION: ' \( description) ' " )
390+ }
391+
392+ let prompt = """
393+ Generate a caption for an image with the following information:
394+
395+ \( contextParts. joined ( separator: " \n " ) )
396+ """
397+
398+ WPLogInfo ( " IntelligenceService.generateCaption prompt: \n \( prompt) " )
399+
400+ let response = try await session. respond (
401+ to: prompt,
402+ options: GenerationOptions ( temperature: 0.8 )
403+ )
404+
405+ WPLogInfo ( " IntelligenceService.generateCaption executed in \( ( CFAbsoluteTimeGetCurrent ( ) - startTime) * 1000 ) ms " )
406+
407+ return response. content. trimmingCharacters ( in: . whitespacesAndNewlines)
408+ }
158409}
159410
160411private extension Array where Element: Hashable {
0 commit comments