Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 251 additions & 0 deletions Modules/Sources/WordPressShared/Intelligence/IntelligenceService.swift
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import Foundation
import FoundationModels
import Vision
import CoreImage

@available(iOS 26, *)
public actor IntelligenceService {
Expand All @@ -17,6 +19,66 @@

public init() {}

/// Analyzes an image using Vision framework to extract visual information.
///
/// - Parameter cgImage: The image to analyze.
/// - Returns: A description of what's in the image.
public func analyzeImage(_ cgImage: CGImage) async throws -> String {
let startTime = CFAbsoluteTimeGetCurrent()

var analysisResults: [String] = []

// 1. Scene classification
let sceneRequest = VNClassifyImageRequest()

// 2. Object recognition
let objectRequest = VNRecognizeAnimalsRequest()

// 3. Text detection
let textRequest = VNRecognizeTextRequest()
textRequest.recognitionLevel = .fast

// Perform all requests
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
try handler.perform([sceneRequest, objectRequest, textRequest])

// Process scene classifications
if let sceneResults = sceneRequest.results as? [VNClassificationObservation] {
let topScenes = sceneResults
.prefix(3)
.filter { $0.confidence > 0.3 }
.map { "\($0.identifier) (\(Int($0.confidence * 100))%)" }
if !topScenes.isEmpty {
analysisResults.append("Scenes: \(topScenes.joined(separator: ", "))")
}
}

// Process animal recognition
if let animalResults = objectRequest.results as? [VNRecognizedObjectObservation] {
let animals = animalResults
.filter { $0.confidence > 0.5 }
.compactMap { $0.labels.first?.identifier }
if !animals.isEmpty {
analysisResults.append("Animals: \(animals.joined(separator: ", "))")
}
}

// Process text recognition
if let textResults = textRequest.results as? [VNRecognizedTextObservation] {
let recognizedText = textResults
.prefix(5)
.compactMap { $0.topCandidates(1).first?.string }
.filter { !$0.isEmpty }
if !recognizedText.isEmpty {
analysisResults.append("Text: \(recognizedText.joined(separator: ", "))")
}
}

WPLogInfo("IntelligenceService.analyzeImage executed in \((CFAbsoluteTimeGetCurrent() - startTime) * 1000) ms")

return analysisResults.isEmpty ? "" : analysisResults.joined(separator: "; ")
}

/// Suggests tags for a WordPress post.
///
/// - Parameters:
Expand Down Expand Up @@ -155,6 +217,195 @@
let postSizeLimit = Double(IntelligenceService.contextSizeLimit) * ratio
return String((extract ?? post).prefix(Int(postSizeLimit)))
}

/// Metadata for generating alt text and captions.
public struct MediaMetadata {
public let filename: String?
public let title: String?
public let caption: String?
public let description: String?
public let altText: String?
public let fileType: String?
public let dimensions: String?
public let imageAnalysis: String?

public init(filename: String? = nil, title: String? = nil, caption: String? = nil, description: String? = nil, altText: String? = nil, fileType: String? = nil, dimensions: String? = nil, imageAnalysis: String? = nil) {

Check warning on line 232 in Modules/Sources/WordPressShared/Intelligence/IntelligenceService.swift

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Initializer has 8 parameters, which is greater than the 7 authorized.

See more on https://sonarcloud.io/project/issues?id=wordpress-mobile_WordPress-iOS&issues=AZrMikGL97Rg6UUugA-V&open=AZrMikGL97Rg6UUugA-V&pullRequest=25035
self.filename = filename
self.title = title
self.caption = caption
self.description = description
self.altText = altText
self.fileType = fileType
self.dimensions = dimensions
self.imageAnalysis = imageAnalysis
}

var hasContent: Bool {
return [filename, title, caption, description, altText, fileType, dimensions, imageAnalysis]
.contains(where: { !($0?.isEmpty ?? true) })
}
}

/// Generates alt text for a media item based on available metadata.
///
/// - Parameter metadata: The media metadata to use for generation.
/// - Returns: Generated alt text.
public func generateAltText(metadata: MediaMetadata) async throws -> String {
guard metadata.hasContent else {
throw NSError(domain: "IntelligenceService", code: -1, userInfo: [
NSLocalizedDescriptionKey: "Insufficient metadata to generate alt text. Please add a filename, title, or description first."
])
}

let startTime = CFAbsoluteTimeGetCurrent()

let instructions = """
You are helping a WordPress user generate alt text for an image.
Alt text should be concise, descriptive, and accessible for screen readers.

**Parameters**
- IMAGE_ANALYSIS: Visual analysis of the actual image content (MOST IMPORTANT)
- FILENAME: the image filename
- FILE_TYPE: the file type/extension
- DIMENSIONS: the image dimensions
- TITLE: the image title (if available)
- CAPTION: the image caption (if available)
- DESCRIPTION: the image description (if available)

**Requirements**
- Generate concise alt text (1-2 sentences, max 125 characters)
- Prioritize IMAGE_ANALYSIS when describing what's in the image
- Focus on what the image depicts, not decorative elements
- Use simple, clear language
- Do not include phrases like "image of" or "picture of"
- Only output the alt text, nothing else
"""

let session = LanguageModelSession(
model: .init(guardrails: .permissiveContentTransformations),
instructions: instructions
)

var contextParts: [String] = []
if let imageAnalysis = metadata.imageAnalysis, !imageAnalysis.isEmpty {
contextParts.append("IMAGE_ANALYSIS: '\(imageAnalysis)'")
}
if let filename = metadata.filename, !filename.isEmpty {
contextParts.append("FILENAME: '\(filename)'")
}
if let fileType = metadata.fileType, !fileType.isEmpty {
contextParts.append("FILE_TYPE: '\(fileType)'")
}
if let dimensions = metadata.dimensions, !dimensions.isEmpty {
contextParts.append("DIMENSIONS: '\(dimensions)'")
}
if let title = metadata.title, !title.isEmpty {
contextParts.append("TITLE: '\(title)'")
}
if let caption = metadata.caption, !caption.isEmpty {
contextParts.append("CAPTION: '\(caption)'")
}
if let description = metadata.description, !description.isEmpty {
contextParts.append("DESCRIPTION: '\(description)'")
}

let prompt = """
Generate alt text for an image with the following information:

\(contextParts.joined(separator: "\n"))
"""

WPLogInfo("IntelligenceService.generateAltText prompt:\n\(prompt)")

let response = try await session.respond(
to: prompt,
options: GenerationOptions(temperature: 0.7)
)

WPLogInfo("IntelligenceService.generateAltText executed in \((CFAbsoluteTimeGetCurrent() - startTime) * 1000) ms")

return response.content.trimmingCharacters(in: .whitespacesAndNewlines)
}

/// Generates a caption for a media item based on available metadata.
///
/// - Parameter metadata: The media metadata to use for generation.
/// - Returns: Generated caption.
public func generateCaption(metadata: MediaMetadata) async throws -> String {
guard metadata.hasContent else {
throw NSError(domain: "IntelligenceService", code: -1, userInfo: [
NSLocalizedDescriptionKey: "Insufficient metadata to generate caption. Please add a filename, title, or description first."
])
}

let startTime = CFAbsoluteTimeGetCurrent()

let instructions = """
You are helping a WordPress user generate a caption for an image.
Captions should be engaging, informative, and complement the image.

**Parameters**
- IMAGE_ANALYSIS: Visual analysis of the actual image content (MOST IMPORTANT)
- FILENAME: the image filename
- FILE_TYPE: the file type/extension
- DIMENSIONS: the image dimensions
- TITLE: the image title (if available)
- ALT_TEXT: the image alt text (if available)
- DESCRIPTION: the image description (if available)

**Requirements**
- Generate an engaging caption (1-2 sentences)
- Prioritize IMAGE_ANALYSIS to understand what's actually in the image
- Can be more creative and conversational than alt text
- May include context, emotion, or storytelling elements
- Only output the caption, nothing else
"""

let session = LanguageModelSession(
model: .init(guardrails: .permissiveContentTransformations),
instructions: instructions
)

var contextParts: [String] = []
if let imageAnalysis = metadata.imageAnalysis, !imageAnalysis.isEmpty {
contextParts.append("IMAGE_ANALYSIS: '\(imageAnalysis)'")
}
if let filename = metadata.filename, !filename.isEmpty {
contextParts.append("FILENAME: '\(filename)'")
}
if let fileType = metadata.fileType, !fileType.isEmpty {
contextParts.append("FILE_TYPE: '\(fileType)'")
}
if let dimensions = metadata.dimensions, !dimensions.isEmpty {
contextParts.append("DIMENSIONS: '\(dimensions)'")
}
if let title = metadata.title, !title.isEmpty {
contextParts.append("TITLE: '\(title)'")
}
if let altText = metadata.altText, !altText.isEmpty {
contextParts.append("ALT_TEXT: '\(altText)'")
}
if let description = metadata.description, !description.isEmpty {
contextParts.append("DESCRIPTION: '\(description)'")
}

let prompt = """
Generate a caption for an image with the following information:

\(contextParts.joined(separator: "\n"))
"""

WPLogInfo("IntelligenceService.generateCaption prompt:\n\(prompt)")

let response = try await session.respond(
to: prompt,
options: GenerationOptions(temperature: 0.8)
)

WPLogInfo("IntelligenceService.generateCaption executed in \((CFAbsoluteTimeGetCurrent() - startTime) * 1000) ms")

return response.content.trimmingCharacters(in: .whitespacesAndNewlines)
}
}

private extension Array where Element: Hashable {
Expand Down
34 changes: 30 additions & 4 deletions WordPress/Classes/ViewRelated/Media/MediaItemViewController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,24 @@

private let headerView = MediaItemHeaderView()
private lazy var headerMaxHeightConstraint = headerView.heightAnchor.constraint(lessThanOrEqualToConstant: 320)
private var _textGenerationController: AnyObject?

Check warning on line 25 in WordPress/Classes/ViewRelated/Media/MediaItemViewController.swift

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this field "_textGenerationController" to match the regular expression ^[a-z][a-zA-Z0-9]*$.

See more on https://sonarcloud.io/project/issues?id=wordpress-mobile_WordPress-iOS&issues=AZrMikCM97Rg6UUugA-U&open=AZrMikCM97Rg6UUugA-U&pullRequest=25035

@available(iOS 26, *)
private var textGenerationController: MediaTextGenerationController {
if _textGenerationController == nil {
_textGenerationController = MediaTextGenerationController(media: media) { [weak self] type, generatedText in
guard let self else { return }
switch type {
case .altText:
self.mediaMetadata.alt = generatedText
case .caption:
self.mediaMetadata.caption = generatedText
}
self.reloadViewModel()
}
}
return _textGenerationController as! MediaTextGenerationController
}

init(media: Media) {
self.media = media
Expand Down Expand Up @@ -327,11 +345,14 @@
private func editCaption() -> ((ImmuTableRow) -> ()) {
return { [weak self] row in
let editableRow = row as! EditableTextRow
self?.pushSettingsController(for: editableRow, hint: Strings.Hints.imageCaption,
let controller = self?.pushSettingsController(for: editableRow, hint: Strings.Hints.imageCaption,
onValueChanged: { value in
self?.mediaMetadata.caption = value
self?.reloadViewModel()
})
if #available(iOS 26, *), let self, let controller {
self.textGenerationController.configure(controller, for: .caption)
}
}
}

Expand All @@ -349,15 +370,19 @@
private func editAlt() -> ((ImmuTableRow) -> ()) {
return { [weak self] row in
let editableRow = row as! EditableTextRow
self?.pushSettingsController(for: editableRow, hint: Strings.Hints.imageAlt,
let controller = self?.pushSettingsController(for: editableRow, hint: Strings.Hints.imageAlt,
onValueChanged: { value in
self?.mediaMetadata.alt = value
self?.reloadViewModel()
})
if #available(iOS 26, *), let self, let controller {
self.textGenerationController.configure(controller, for: .altText)
}
}
}

private func pushSettingsController(for row: EditableTextRow, hint: String? = nil, onValueChanged: @escaping SettingsTextChanged) {
@discardableResult
private func pushSettingsController(for row: EditableTextRow, hint: String? = nil, onValueChanged: @escaping SettingsTextChanged) -> SettingsTextViewController {
let title = row.title
let value = row.value
let controller = SettingsTextViewController(text: value, placeholder: "\(title)...", hint: hint)
Expand All @@ -366,6 +391,7 @@
controller.onValueChanged = onValueChanged

navigationController?.pushViewController(controller, animated: true)
return controller
}

// MARK: - Sharing Logic
Expand Down Expand Up @@ -417,7 +443,7 @@
/// Provides some extra formatting for a Media asset's metadata, used
/// to present it in the MediaItemViewController
///
private struct MediaMetadataPresenter {
struct MediaMetadataPresenter {
let media: Media

/// A String containing the pixel size of the asset (width X height)
Expand Down
Loading