wordpress-mobile · kean · Nov 28, 2025
diff --git a/Modules/Sources/WordPressShared/Intelligence/IntelligenceService.swift b/Modules/Sources/WordPressShared/Intelligence/IntelligenceService.swift
@@ -1,5 +1,7 @@
 import Foundation
 import FoundationModels
+import Vision
+import CoreImage
 
 @available(iOS 26, *)
 public actor IntelligenceService {
@@ -17,6 +19,66 @@
 
     public init() {}
 
+    /// Analyzes an image using Vision framework to extract visual information.
+    ///
+    /// - Parameter cgImage: The image to analyze.
+    /// - Returns: A description of what's in the image.
+    public func analyzeImage(_ cgImage: CGImage) async throws -> String {
+        let startTime = CFAbsoluteTimeGetCurrent()
+
+        var analysisResults: [String] = []
+
+        // 1. Scene classification
+        let sceneRequest = VNClassifyImageRequest()
+
+        // 2. Object recognition
+        let objectRequest = VNRecognizeAnimalsRequest()
+
+        // 3. Text detection
+        let textRequest = VNRecognizeTextRequest()
+        textRequest.recognitionLevel = .fast
+
+        // Perform all requests
+        let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
+        try handler.perform([sceneRequest, objectRequest, textRequest])
+
+        // Process scene classifications
+        if let sceneResults = sceneRequest.results as? [VNClassificationObservation] {
+            let topScenes = sceneResults
+                .prefix(3)
+                .filter { $0.confidence > 0.3 }
+                .map { "\($0.identifier) (\(Int($0.confidence * 100))%)" }
+            if !topScenes.isEmpty {
+                analysisResults.append("Scenes: \(topScenes.joined(separator: ", "))")
+            }
+        }
+
+        // Process animal recognition
+        if let animalResults = objectRequest.results as? [VNRecognizedObjectObservation] {
+            let animals = animalResults
+                .filter { $0.confidence > 0.5 }
+                .compactMap { $0.labels.first?.identifier }
+            if !animals.isEmpty {
+                analysisResults.append("Animals: \(animals.joined(separator: ", "))")
+            }
+        }
+
+        // Process text recognition
+        if let textResults = textRequest.results as? [VNRecognizedTextObservation] {
+            let recognizedText = textResults
+                .prefix(5)
+                .compactMap { $0.topCandidates(1).first?.string }
+                .filter { !$0.isEmpty }
+            if !recognizedText.isEmpty {
+                analysisResults.append("Text: \(recognizedText.joined(separator: ", "))")
+            }
+        }
+
+        WPLogInfo("IntelligenceService.analyzeImage executed in \((CFAbsoluteTimeGetCurrent() - startTime) * 1000) ms")
+
+        return analysisResults.isEmpty ? "" : analysisResults.joined(separator: "; ")
+    }
+
     /// Suggests tags for a WordPress post.
     ///
     /// - Parameters:
@@ -155,6 +217,195 @@
         let postSizeLimit = Double(IntelligenceService.contextSizeLimit) * ratio
         return String((extract ?? post).prefix(Int(postSizeLimit)))
     }
+
+    /// Metadata for generating alt text and captions.
+    public struct MediaMetadata {
+        public let filename: String?
+        public let title: String?
+        public let caption: String?
+        public let description: String?
+        public let altText: String?
+        public let fileType: String?
+        public let dimensions: String?
+        public let imageAnalysis: String?
+
+        public init(filename: String? = nil, title: String? = nil, caption: String? = nil, description: String? = nil, altText: String? = nil, fileType: String? = nil, dimensions: String? = nil, imageAnalysis: String? = nil) {
+            self.filename = filename
+            self.title = title
+            self.caption = caption
+            self.description = description
+            self.altText = altText
+            self.fileType = fileType
+            self.dimensions = dimensions
+            self.imageAnalysis = imageAnalysis
+        }
+
+        var hasContent: Bool {
+            return [filename, title, caption, description, altText, fileType, dimensions, imageAnalysis]
+                .contains(where: { !($0?.isEmpty ?? true) })
+        }
+    }
+
+    /// Generates alt text for a media item based on available metadata.
+    ///
+    /// - Parameter metadata: The media metadata to use for generation.
+    /// - Returns: Generated alt text.
+    public func generateAltText(metadata: MediaMetadata) async throws -> String {
+        guard metadata.hasContent else {
+            throw NSError(domain: "IntelligenceService", code: -1, userInfo: [
+                NSLocalizedDescriptionKey: "Insufficient metadata to generate alt text. Please add a filename, title, or description first."
+            ])
+        }
+
+        let startTime = CFAbsoluteTimeGetCurrent()
+
+        let instructions = """
+        You are helping a WordPress user generate alt text for an image.
+        Alt text should be concise, descriptive, and accessible for screen readers.
+
+        **Parameters**
+        - IMAGE_ANALYSIS: Visual analysis of the actual image content (MOST IMPORTANT)
+        - FILENAME: the image filename
+        - FILE_TYPE: the file type/extension
+        - DIMENSIONS: the image dimensions
+        - TITLE: the image title (if available)
+        - CAPTION: the image caption (if available)
+        - DESCRIPTION: the image description (if available)
+
+        **Requirements**
+        - Generate concise alt text (1-2 sentences, max 125 characters)
+        - Prioritize IMAGE_ANALYSIS when describing what's in the image
+        - Focus on what the image depicts, not decorative elements
+        - Use simple, clear language
+        - Do not include phrases like "image of" or "picture of"
+        - Only output the alt text, nothing else
+        """
+
+        let session = LanguageModelSession(
+            model: .init(guardrails: .permissiveContentTransformations),
+            instructions: instructions
+        )
+
+        var contextParts: [String] = []
+        if let imageAnalysis = metadata.imageAnalysis, !imageAnalysis.isEmpty {
+            contextParts.append("IMAGE_ANALYSIS: '\(imageAnalysis)'")
+        }
+        if let filename = metadata.filename, !filename.isEmpty {
+            contextParts.append("FILENAME: '\(filename)'")
+        }
+        if let fileType = metadata.fileType, !fileType.isEmpty {
+            contextParts.append("FILE_TYPE: '\(fileType)'")
+        }
+        if let dimensions = metadata.dimensions, !dimensions.isEmpty {
+            contextParts.append("DIMENSIONS: '\(dimensions)'")
+        }
+        if let title = metadata.title, !title.isEmpty {
+            contextParts.append("TITLE: '\(title)'")
+        }
+        if let caption = metadata.caption, !caption.isEmpty {
+            contextParts.append("CAPTION: '\(caption)'")
+        }
+        if let description = metadata.description, !description.isEmpty {
+            contextParts.append("DESCRIPTION: '\(description)'")
+        }
+
+        let prompt = """
+        Generate alt text for an image with the following information:
+
+        \(contextParts.joined(separator: "\n"))
+        """
+
+        WPLogInfo("IntelligenceService.generateAltText prompt:\n\(prompt)")
+
+        let response = try await session.respond(
+            to: prompt,
+            options: GenerationOptions(temperature: 0.7)
+        )
+
+        WPLogInfo("IntelligenceService.generateAltText executed in \((CFAbsoluteTimeGetCurrent() - startTime) * 1000) ms")
+
+        return response.content.trimmingCharacters(in: .whitespacesAndNewlines)
+    }
+
+    /// Generates a caption for a media item based on available metadata.
+    ///
+    /// - Parameter metadata: The media metadata to use for generation.
+    /// - Returns: Generated caption.
+    public func generateCaption(metadata: MediaMetadata) async throws -> String {
+        guard metadata.hasContent else {
+            throw NSError(domain: "IntelligenceService", code: -1, userInfo: [
+                NSLocalizedDescriptionKey: "Insufficient metadata to generate caption. Please add a filename, title, or description first."
+            ])
+        }
+
+        let startTime = CFAbsoluteTimeGetCurrent()
+
+        let instructions = """
+        You are helping a WordPress user generate a caption for an image.
+        Captions should be engaging, informative, and complement the image.
+
+        **Parameters**
+        - IMAGE_ANALYSIS: Visual analysis of the actual image content (MOST IMPORTANT)
+        - FILENAME: the image filename
+        - FILE_TYPE: the file type/extension
+        - DIMENSIONS: the image dimensions
+        - TITLE: the image title (if available)
+        - ALT_TEXT: the image alt text (if available)
+        - DESCRIPTION: the image description (if available)
+
+        **Requirements**
+        - Generate an engaging caption (1-2 sentences)
+        - Prioritize IMAGE_ANALYSIS to understand what's actually in the image
+        - Can be more creative and conversational than alt text
+        - May include context, emotion, or storytelling elements
+        - Only output the caption, nothing else
+        """
+
+        let session = LanguageModelSession(
+            model: .init(guardrails: .permissiveContentTransformations),
+            instructions: instructions
+        )
+
+        var contextParts: [String] = []
+        if let imageAnalysis = metadata.imageAnalysis, !imageAnalysis.isEmpty {
+            contextParts.append("IMAGE_ANALYSIS: '\(imageAnalysis)'")
+        }
+        if let filename = metadata.filename, !filename.isEmpty {
+            contextParts.append("FILENAME: '\(filename)'")
+        }
+        if let fileType = metadata.fileType, !fileType.isEmpty {
+            contextParts.append("FILE_TYPE: '\(fileType)'")
+        }
+        if let dimensions = metadata.dimensions, !dimensions.isEmpty {
+            contextParts.append("DIMENSIONS: '\(dimensions)'")
+        }
+        if let title = metadata.title, !title.isEmpty {
+            contextParts.append("TITLE: '\(title)'")
+        }
+        if let altText = metadata.altText, !altText.isEmpty {
+            contextParts.append("ALT_TEXT: '\(altText)'")
+        }
+        if let description = metadata.description, !description.isEmpty {
+            contextParts.append("DESCRIPTION: '\(description)'")
+        }
+
+        let prompt = """
+        Generate a caption for an image with the following information:
+
+        \(contextParts.joined(separator: "\n"))
+        """
+
+        WPLogInfo("IntelligenceService.generateCaption prompt:\n\(prompt)")
+
+        let response = try await session.respond(
+            to: prompt,
+            options: GenerationOptions(temperature: 0.8)
+        )
+
+        WPLogInfo("IntelligenceService.generateCaption executed in \((CFAbsoluteTimeGetCurrent() - startTime) * 1000) ms")
+
+        return response.content.trimmingCharacters(in: .whitespacesAndNewlines)
+    }
 }
 
 private extension Array where Element: Hashable {

diff --git a/WordPress/Classes/ViewRelated/Media/MediaItemViewController.swift b/WordPress/Classes/ViewRelated/Media/MediaItemViewController.swift
@@ -22,6 +22,24 @@
 
     private let headerView = MediaItemHeaderView()
     private lazy var headerMaxHeightConstraint = headerView.heightAnchor.constraint(lessThanOrEqualToConstant: 320)
+    private var _textGenerationController: AnyObject?
+
+    @available(iOS 26, *)
+    private var textGenerationController: MediaTextGenerationController {
+        if _textGenerationController == nil {
+            _textGenerationController = MediaTextGenerationController(media: media) { [weak self] type, generatedText in
+                guard let self else { return }
+                switch type {
+                case .altText:
+                    self.mediaMetadata.alt = generatedText
+                case .caption:
+                    self.mediaMetadata.caption = generatedText
+                }
+                self.reloadViewModel()
+            }
+        }
+        return _textGenerationController as! MediaTextGenerationController
+    }
 
     init(media: Media) {
         self.media = media
@@ -327,11 +345,14 @@
     private func editCaption() -> ((ImmuTableRow) -> ()) {
         return { [weak self] row in
             let editableRow = row as! EditableTextRow
-            self?.pushSettingsController(for: editableRow, hint: Strings.Hints.imageCaption,
+            let controller = self?.pushSettingsController(for: editableRow, hint: Strings.Hints.imageCaption,
                                         onValueChanged: { value in
                 self?.mediaMetadata.caption = value
                 self?.reloadViewModel()
             })
+            if #available(iOS 26, *), let self, let controller {
+                self.textGenerationController.configure(controller, for: .caption)
+            }
         }
     }
 
@@ -349,15 +370,19 @@
     private func editAlt() -> ((ImmuTableRow) -> ()) {
         return { [weak self] row in
             let editableRow = row as! EditableTextRow
-            self?.pushSettingsController(for: editableRow, hint: Strings.Hints.imageAlt,
+            let controller = self?.pushSettingsController(for: editableRow, hint: Strings.Hints.imageAlt,
                                          onValueChanged: { value in
                                             self?.mediaMetadata.alt = value
                                             self?.reloadViewModel()
             })
+            if #available(iOS 26, *), let self, let controller {
+                self.textGenerationController.configure(controller, for: .altText)
+            }
         }
     }
 
-    private func pushSettingsController(for row: EditableTextRow, hint: String? = nil, onValueChanged: @escaping SettingsTextChanged) {
+    @discardableResult
+    private func pushSettingsController(for row: EditableTextRow, hint: String? = nil, onValueChanged: @escaping SettingsTextChanged) -> SettingsTextViewController {
         let title = row.title
         let value = row.value
         let controller = SettingsTextViewController(text: value, placeholder: "\(title)...", hint: hint)
@@ -366,6 +391,7 @@
         controller.onValueChanged = onValueChanged
 
         navigationController?.pushViewController(controller, animated: true)
+        return controller
     }
 
     // MARK: - Sharing Logic
@@ -417,7 +443,7 @@
 /// Provides some extra formatting for a Media asset's metadata, used
 /// to present it in the MediaItemViewController
 ///
-private struct MediaMetadataPresenter {
+struct MediaMetadataPresenter {
     let media: Media
 
     /// A String containing the pixel size of the asset (width X height)