diff --git a/FirebaseAI/CHANGELOG.md b/FirebaseAI/CHANGELOG.md index b1750ed5ce1..e2b120d2e19 100644 --- a/FirebaseAI/CHANGELOG.md +++ b/FirebaseAI/CHANGELOG.md @@ -1,3 +1,8 @@ +# Unreleased +- [feature] Adds support for configuring image generation properties, + such as aspect ratio and image size, through the new `ImageConfig` struct + and its integration with `GenerationConfig`. + # 12.11.0 - [feature] **Public Preview**: Introduces `GenerativeModelSession` providing APIs for generating structured data from Gemini via the same `@Generable` and diff --git a/FirebaseAI/Sources/GenerationConfig.swift b/FirebaseAI/Sources/GenerationConfig.swift index f420e7fee44..1d39f0e3545 100644 --- a/FirebaseAI/Sources/GenerationConfig.swift +++ b/FirebaseAI/Sources/GenerationConfig.swift @@ -59,6 +59,9 @@ public struct GenerationConfig: Sendable { /// Configuration for controlling the "thinking" behavior of compatible Gemini models. var thinkingConfig: ThinkingConfig? + /// Configuration for the aspect ratio and size of generated images. + var imageConfig: ImageConfig? + /// Creates a new `GenerationConfig` value. /// /// See the @@ -162,12 +165,13 @@ public struct GenerationConfig: Sendable { /// > backwards-incompatible ways. /// - thinkingConfig: Configuration for controlling the "thinking" behavior of compatible Gemini /// models; see ``ThinkingConfig`` for more details. + /// - imageConfig: Configure the aspect ratio and size of generated images. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil, maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, stopSequences: [String]? = nil, responseMIMEType: String? = nil, responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil, - thinkingConfig: ThinkingConfig? = nil) { + thinkingConfig: ThinkingConfig? = nil, imageConfig: ImageConfig? = nil) { // Explicit init because otherwise if we re-arrange the above variables it changes the API // surface. self.temperature = temperature @@ -183,12 +187,14 @@ public struct GenerationConfig: Sendable { responseJSONSchema = nil self.responseModalities = responseModalities self.thinkingConfig = thinkingConfig + self.imageConfig = imageConfig } init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil, maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, stopSequences: [String]? = nil, responseMIMEType: String, responseJSONSchema: JSONObject, - responseModalities: [ResponseModality]? = nil, thinkingConfig: ThinkingConfig? = nil) { + responseModalities: [ResponseModality]? = nil, thinkingConfig: ThinkingConfig? = nil, + imageConfig: ImageConfig? = nil) { self.temperature = temperature self.topP = topP self.topK = topK @@ -202,6 +208,7 @@ public struct GenerationConfig: Sendable { self.responseJSONSchema = responseJSONSchema self.responseModalities = responseModalities self.thinkingConfig = thinkingConfig + self.imageConfig = imageConfig } /// Merges two configurations, giving precedence to values found in the `overrides` parameter. @@ -239,6 +246,7 @@ public struct GenerationConfig: Sendable { config.responseMIMEType = overrideConfig.responseMIMEType ?? config.responseMIMEType config.responseModalities = overrideConfig.responseModalities ?? config.responseModalities config.thinkingConfig = overrideConfig.thinkingConfig ?? config.thinkingConfig + config.imageConfig = overrideConfig.imageConfig ?? config.imageConfig // 5. Handle Schema mutual exclusivity with precedence for `responseJSONSchema`. if let responseJSONSchema = overrideConfig.responseJSONSchema { @@ -271,5 +279,6 @@ extension GenerationConfig: Encodable { case responseJSONSchema = "responseJsonSchema" case responseModalities case thinkingConfig + case imageConfig } } diff --git a/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationParameters.swift b/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationParameters.swift index aa1e1b085c8..d1e93d106c0 100644 --- a/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationParameters.swift +++ b/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationParameters.swift @@ -18,6 +18,7 @@ struct ImageGenerationParameters { let storageURI: String? let negativePrompt: String? let aspectRatio: String? + let sampleImageSize: String? let safetyFilterLevel: String? let personGeneration: String? let outputOptions: ImageGenerationOutputOptions? @@ -38,6 +39,7 @@ extension ImageGenerationParameters: Encodable { case storageURI = "storageUri" case negativePrompt case aspectRatio + case sampleImageSize case safetyFilterLevel = "safetySetting" case personGeneration case outputOptions @@ -52,6 +54,7 @@ extension ImageGenerationParameters: Encodable { try container.encodeIfPresent(storageURI, forKey: .storageURI) try container.encodeIfPresent(negativePrompt, forKey: .negativePrompt) try container.encodeIfPresent(aspectRatio, forKey: .aspectRatio) + try container.encodeIfPresent(sampleImageSize, forKey: .sampleImageSize) try container.encodeIfPresent(safetyFilterLevel, forKey: .safetyFilterLevel) try container.encodeIfPresent(personGeneration, forKey: .personGeneration) try container.encodeIfPresent(outputOptions, forKey: .outputOptions) diff --git a/FirebaseAI/Sources/Types/Public/AGENTS.md b/FirebaseAI/Sources/Types/Public/AGENTS.md index fe6e22a1362..a9d1c13e0f6 100644 --- a/FirebaseAI/Sources/Types/Public/AGENTS.md +++ b/FirebaseAI/Sources/Types/Public/AGENTS.md @@ -14,6 +14,7 @@ Any changes to these types must be done carefully to avoid breaking changes for ### Files - **`Backend.swift`**: Defines the `Backend` struct, which is used to configure the backend API for the Firebase AI SDK. It provides static methods `vertexAI(location:)` and `googleAI()` to create instances for the respective backends. +- **`ImageConfig.swift`**: Defines the `ImageConfig` struct, used for configuring generated image properties like aspect ratio and size. - **`Part.swift`**: Defines the `Part` protocol and several conforming structs: `TextPart`, `InlineDataPart`, `FileDataPart`, `FunctionCallPart`, `FunctionResponsePart`, `ExecutableCodePart`, and `CodeExecutionResultPart`. A `Part` represents a discrete piece of data in a media format that can be interpreted by the model. - **`ResponseModality.swift`**: Defines the `ResponseModality` struct, which represents the different types of data that a model can produce as output (e.g., `text`, `image`, `audio`). - **`Schema.swift`**: Defines the `Schema` class, which allows the definition of input and output data types for function calling. It supports various data types like string, number, integer, boolean, array, and object. diff --git a/FirebaseAI/Sources/Types/Public/ImageConfig.swift b/FirebaseAI/Sources/Types/Public/ImageConfig.swift new file mode 100644 index 00000000000..8ae4ce66690 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/ImageConfig.swift @@ -0,0 +1,172 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Configuration options for generating images with Gemini models. +/// +/// See the [documentation](https://ai.google.dev/gemini-api/docs/image-generation#aspect_ratios_and_image_size) +/// to learn about parameters available for use with Gemini image models. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct ImageConfig: Sendable { + /// The aspect ratio of generated images. + public let aspectRatio: AspectRatio? + + /// The size of the generated images. + public let imageSize: ImageSize? + + /// Initializes configuration options for generating images with Gemini. + /// + /// - Parameters: + /// - aspectRatio: The aspect ratio of generated images. + /// - imageSize: The size of the generated images. + public init(aspectRatio: AspectRatio? = nil, imageSize: ImageSize? = nil) { + self.aspectRatio = aspectRatio + self.imageSize = imageSize + } +} + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public extension ImageConfig { + /// An aspect ratio for generated images. + struct AspectRatio: Sendable { + /// Square (1:1) aspect ratio. + /// + /// Common uses for this aspect ratio include social media posts. + public static let square1x1 = AspectRatio(kind: .square1x1) + + /// Portrait widescreen (9:16) aspect ratio. + /// + /// This is the ``landscape16x9`` aspect ratio rotated 90 degrees. This is a relatively new + /// aspect ratio that has been popularized by short form video apps (for example, YouTube + /// shorts). Use this for tall objects with strong vertical orientations such as buildings, + /// trees, waterfalls, or other similar objects. + public static let portrait9x16 = AspectRatio(kind: .portrait9x16) + + /// Widescreen (16:9) aspect ratio. + /// + /// This ratio has replaced ``landscape4x3`` as the most common aspect ratio for TVs, monitors, + /// and mobile phone screens (landscape). Use this aspect ratio when you want to capture more of + /// the background (for example, scenic landscapes). + public static let landscape16x9 = AspectRatio(kind: .landscape16x9) + + /// Portrait full screen (3:4) aspect ratio. + /// + /// This is the ``landscape4x3`` aspect ratio rotated 90 degrees. This allows you to capture + /// more of the scene vertically compared to the ``square1x1`` aspect ratio. + public static let portrait3x4 = AspectRatio(kind: .portrait3x4) + + /// Fullscreen (4:3) aspect ratio. + /// + /// This aspect ratio is commonly used in media or film. It is also the dimensions of most old + /// (non-widescreen) TVs and medium format cameras. It captures more of the scene horizontally + /// (compared to ``square1x1``), making it a preferred aspect ratio for photography. + public static let landscape4x3 = AspectRatio(kind: .landscape4x3) + + /// Portrait (2:3) aspect ratio. + public static let portrait2x3 = AspectRatio(kind: .portrait2x3) + + /// Landscape (3:2) aspect ratio. + public static let landscape3x2 = AspectRatio(kind: .landscape3x2) + + /// Portrait (4:5) aspect ratio. + public static let portrait4x5 = AspectRatio(kind: .portrait4x5) + + /// Landscape (5:4) aspect ratio. + public static let landscape5x4 = AspectRatio(kind: .landscape5x4) + + /// Portrait (1:4) aspect ratio. + public static let portrait1x4 = AspectRatio(kind: .portrait1x4) + + /// Landscape (4:1) aspect ratio. + public static let landscape4x1 = AspectRatio(kind: .landscape4x1) + + /// Portrait (1:8) aspect ratio. + public static let portrait1x8 = AspectRatio(kind: .portrait1x8) + + /// Landscape (8:1) aspect ratio. + public static let landscape8x1 = AspectRatio(kind: .landscape8x1) + + /// Ultrawide (21:9) aspect ratio. + public static let ultrawide21x9 = AspectRatio(kind: .ultrawide21x9) + + let rawValue: String + } + + /// The size of images to generate. + struct ImageSize: Sendable { + /// 512px (0.5K) image size. + /// + /// This corresponds to 512x512 pixel images in a ``ImageConfig/AspectRatio/square1x1`` aspect + /// ratio. See the [documentation](https://ai.google.dev/gemini-api/docs/image-generation#aspect_ratios_and_image_size) + /// for specific sizes in other aspect ratios. + public static let size512 = ImageSize(kind: .size512) + + /// 1K image size. + /// + /// This corresponds to 1024x1024 pixel images in a ``ImageConfig/AspectRatio/square1x1`` aspect + /// ratio. See the [documentation](https://ai.google.dev/gemini-api/docs/image-generation#aspect_ratios_and_image_size) + /// for specific sizes in other aspect ratios. + public static let size1K = ImageSize(kind: .size1K) + + /// 2K image size. + /// + /// This corresponds to 2048x2048 pixel images in a ``ImageConfig/AspectRatio/square1x1`` aspect + /// ratio. See the [documentation](https://ai.google.dev/gemini-api/docs/image-generation#aspect_ratios_and_image_size) + /// for specific sizes in other aspect ratios. + public static let size2K = ImageSize(kind: .size2K) + + /// 4K image size. + /// + /// This corresponds to 4096x4096 pixel images in a ``ImageConfig/AspectRatio/square1x1`` aspect + /// ratio. See the [documentation](https://ai.google.dev/gemini-api/docs/image-generation#aspect_ratios_and_image_size) + /// for specific sizes in other aspect ratios. + public static let size4K = ImageSize(kind: .size4K) + + let rawValue: String + } +} + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +extension ImageConfig.AspectRatio: EncodableProtoEnum { + enum Kind: String { + case square1x1 = "1:1" + case portrait9x16 = "9:16" + case landscape16x9 = "16:9" + case portrait3x4 = "3:4" + case landscape4x3 = "4:3" + case portrait2x3 = "2:3" + case landscape3x2 = "3:2" + case portrait4x5 = "4:5" + case landscape5x4 = "5:4" + case portrait1x4 = "1:4" + case landscape4x1 = "4:1" + case portrait1x8 = "1:8" + case landscape8x1 = "8:1" + case ultrawide21x9 = "21:9" + } +} + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +extension ImageConfig.ImageSize: EncodableProtoEnum { + enum Kind: String { + case size512 = "512" + case size1K = "1K" + case size2K = "2K" + case size4K = "4K" + } +} + +// MARK: - Codable Conformances + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +extension ImageConfig: Encodable {} diff --git a/FirebaseAI/Sources/Types/Public/Imagen/AGENTS.md b/FirebaseAI/Sources/Types/Public/Imagen/AGENTS.md index 54aaebf5f6c..09244519bc0 100644 --- a/FirebaseAI/Sources/Types/Public/Imagen/AGENTS.md +++ b/FirebaseAI/Sources/Types/Public/Imagen/AGENTS.md @@ -15,6 +15,8 @@ These types are part of the public API and are used by developers to interact wi - **`ImagenImagesBlockedError.swift`**: Defines the `ImagenImagesBlockedError` error, which occurs when image generation fails due to all generated images being blocked. +- **`ImagenImageSize.swift`**: Defines the `ImagenImageSize` struct, which represents the size of images generated by Imagen. It provides static properties for common image sizes like `size1K`, `size2K`, etc. + - **`ImagenInlineImage.swift`**: Defines the `ImagenInlineImage` struct, which represents an image generated by Imagen as inline data. It contains the `mimeType` and the image `data`. - **`ImagenModel.swift`**: Defines the `ImagenModel` class, which is the main entry point for generating images. It has methods like `generateImages(prompt:)` to generate images from a text prompt. diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenAspectRatio.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenAspectRatio.swift index a7e9fd905f0..d150b949df3 100644 --- a/FirebaseAI/Sources/Types/Public/Imagen/ImagenAspectRatio.swift +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenAspectRatio.swift @@ -12,59 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -import Foundation - -/// An aspect ratio for images generated by Imagen. +/// The aspect ratio of images generated by Imagen. /// /// To specify an aspect ratio for generated images, set ``ImagenGenerationConfig/aspectRatio`` in /// your ``ImagenGenerationConfig``. See the [Cloud /// documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-images#aspect-ratio) /// for more details and examples of the supported aspect ratios. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -public struct ImagenAspectRatio: Sendable { - /// Square (1:1) aspect ratio. - /// - /// Common uses for this aspect ratio include social media posts. - public static let square1x1 = ImagenAspectRatio(kind: .square1x1) - - /// Portrait widescreen (9:16) aspect ratio. - /// - /// This is the ``landscape16x9`` aspect ratio rotated 90 degrees. This a relatively new aspect - /// ratio that has been popularized by short form video apps (for example, YouTube shorts). Use - /// this for tall objects with strong vertical orientations such as buildings, trees, waterfalls, - /// or other similar objects. - public static let portrait9x16 = ImagenAspectRatio(kind: .portrait9x16) - - /// Widescreen (16:9) aspect ratio. - /// - /// This ratio has replaced ``landscape4x3`` as the most common aspect ratio for TVs, monitors, - /// and mobile phone screens (landscape). Use this aspect ratio when you want to capture more of - /// the background (for example, scenic landscapes). - public static let landscape16x9 = ImagenAspectRatio(kind: .landscape16x9) - - /// Portrait full screen (3:4) aspect ratio. - /// - /// This is the ``landscape4x3`` aspect ratio rotated 90 degrees. This lets to capture more of - /// the scene vertically compared to the ``square1x1`` aspect ratio. - public static let portrait3x4 = ImagenAspectRatio(kind: .portrait3x4) - - /// Fullscreen (4:3) aspect ratio. - /// - /// This aspect ratio is commonly used in media or film. It is also the dimensions of most old - /// (non-widescreen) TVs and medium format cameras. It captures more of the scene horizontally - /// (compared to ``square1x1``), making it a preferred aspect ratio for photography. - public static let landscape4x3 = ImagenAspectRatio(kind: .landscape4x3) - - let rawValue: String -} - -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -extension ImagenAspectRatio: ProtoEnum { - enum Kind: String { - case square1x1 = "1:1" - case portrait9x16 = "9:16" - case landscape16x9 = "16:9" - case portrait3x4 = "3:4" - case landscape4x3 = "4:3" - } -} +public typealias ImagenAspectRatio = ImageConfig.AspectRatio diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenGenerationConfig.swift index 66623aeedf1..95e6923e1b6 100644 --- a/FirebaseAI/Sources/Types/Public/Imagen/ImagenGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenGenerationConfig.swift @@ -43,6 +43,12 @@ public struct ImagenGenerationConfig { /// ``ImagenAspectRatio`` for more details. public var aspectRatio: ImagenAspectRatio? + /// The size of generated images. + /// + /// Defaults to 1K. Supported image sizes depend on the model; see ``ImagenImageSize`` for more + /// details. + public var imageSize: ImagenImageSize? + /// The image format of generated images. /// /// Defaults to PNG. See ``ImagenImageFormat`` for more details. @@ -65,18 +71,20 @@ public struct ImagenGenerationConfig { /// specified. See ``negativePrompt``. /// - numberOfImages: The number of image samples to generate; defaults to 1 if not specified. /// See ``numberOfImages``. - /// - aspectRatio: The aspect ratio of generated images; defaults to to square, 1:1. See + /// - aspectRatio: The aspect ratio of generated images; defaults to square, 1:1. See /// ``aspectRatio``. + /// - imageSize: The size of generated images; defaults to 1K. See ``imageSize``. /// - imageFormat: The image format of generated images; defaults to PNG. See ``imageFormat``. /// - addWatermark: Whether to add an invisible watermark to generated images; the default value /// depends on the model. See ``addWatermark``. public init(negativePrompt: String? = nil, numberOfImages: Int? = nil, - aspectRatio: ImagenAspectRatio? = nil, imageFormat: ImagenImageFormat? = nil, - addWatermark: Bool? = nil) { - self.numberOfImages = numberOfImages + aspectRatio: ImagenAspectRatio? = nil, imageSize: ImagenImageSize? = nil, + imageFormat: ImagenImageFormat? = nil, addWatermark: Bool? = nil) { self.negativePrompt = negativePrompt - self.imageFormat = imageFormat + self.numberOfImages = numberOfImages self.aspectRatio = aspectRatio + self.imageSize = imageSize + self.imageFormat = imageFormat self.addWatermark = addWatermark } } diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenImageSize.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenImageSize.swift new file mode 100644 index 00000000000..549e16215d8 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenImageSize.swift @@ -0,0 +1,22 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// The size of images generated by Imagen. +/// +/// To specify the size of generated images, set ``ImagenGenerationConfig/imageSize`` in +/// your ``ImagenGenerationConfig``. See the [Cloud +/// documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-images) +/// for more details and examples of the supported aspect ratios. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public typealias ImagenImageSize = ImageConfig.ImageSize diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenModel.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenModel.swift index 51a59796de6..17111cd4dbf 100644 --- a/FirebaseAI/Sources/Types/Public/Imagen/ImagenModel.swift +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenModel.swift @@ -137,6 +137,7 @@ public final class ImagenModel { storageURI: storageURI, negativePrompt: generationConfig?.negativePrompt, aspectRatio: generationConfig?.aspectRatio?.rawValue, + sampleImageSize: generationConfig?.imageSize?.rawValue, safetyFilterLevel: safetySettings?.safetyFilterLevel?.rawValue, personGeneration: safetySettings?.personFilterLevel?.rawValue, outputOptions: generationConfig?.imageFormat.map { diff --git a/FirebaseAI/Tests/TestApp/Sources/Constants.swift b/FirebaseAI/Tests/TestApp/Sources/Constants.swift index a7687ab20ed..691f1eb70ff 100644 --- a/FirebaseAI/Tests/TestApp/Sources/Constants.swift +++ b/FirebaseAI/Tests/TestApp/Sources/Constants.swift @@ -30,5 +30,6 @@ public enum ModelNames { public static let gemini2_5_FlashLivePreview = "gemini-2.5-flash-native-audio-preview-12-2025" public static let gemini2_5_Pro = "gemini-2.5-pro" public static let gemini3_1_FlashLitePreview = "gemini-3.1-flash-lite-preview" + public static let gemini3_1_FlashImagePreview = "gemini-3.1-flash-image-preview" public static let gemma3_4B = "gemma-3-4b-it" } diff --git a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift index 87f4d3dc590..455db0226fe 100644 --- a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift +++ b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift @@ -385,6 +385,93 @@ struct GenerateContentIntegrationTests { #expect(finalText.contains("25")) } + @Test(arguments: [ + (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashImage), + (InstanceConfig.vertexAI_v1beta_global, ModelNames.gemini2_5_FlashImage), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashImage), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini3_1_FlashImagePreview), + (InstanceConfig.vertexAI_v1beta_global, ModelNames.gemini3_1_FlashImagePreview), + ]) + func generateImageWithAspectRatio(_ config: InstanceConfig, modelName: String) async throws { + let imageConfig = ImageConfig(aspectRatio: .landscape16x9) + let generationConfig = GenerationConfig( + temperature: 0.0, + topP: 0.0, + topK: 1, + responseModalities: [.image], + imageConfig: imageConfig + ) + let model = FirebaseAI.componentInstance(config).generativeModel( + modelName: modelName, + generationConfig: generationConfig, + safetySettings: safetySettings + ) + let prompt = "Generate an image of a cute cartoon kitten playing with a ball of yarn." + + let response = try await model.generateContent(prompt) + + let candidate = try #require(response.candidates.first) + let inlineDataPart = try #require(candidate.content.parts + .first { $0 is InlineDataPart } as? InlineDataPart) + let inlineDataPartsViaAccessor = response.inlineDataParts + #expect(inlineDataPartsViaAccessor.count == 1) + let inlineDataPartViaAccessor = try #require(inlineDataPartsViaAccessor.first) + #expect(inlineDataPart == inlineDataPartViaAccessor) + #expect(inlineDataPart.mimeType.starts(with: "image/")) + #expect(inlineDataPart.data.count > 0) + #if canImport(UIKit) + let uiImage = try #require(UIImage(data: inlineDataPart.data)) + // Note: Images are not exactly 16:9 but align with the documented sizes + // (https://ai.google.dev/gemini-api/docs/image-generation#aspect_ratios_and_image_size) + #expect(uiImage.size.width >= 1344) // Gemini 2.5 produces images slightly narrower than 16:9 + #expect(uiImage.size.width <= 1376) // Gemini 3 produces images slightly wider than 16:9 + #expect(uiImage.size.height == 768) + #endif // canImport(UIKit) + } + + @Test(arguments: [ + (InstanceConfig.googleAI_v1beta, ModelNames.gemini3_1_FlashImagePreview), + (InstanceConfig.vertexAI_v1beta_global, ModelNames.gemini3_1_FlashImagePreview), + ]) + func generateImageWithCustomSize(_ config: InstanceConfig, modelName: String) async throws { + let imageConfig = ImageConfig( + // If not specified, images are generated in a random aspect ratio. + // Note: The documentation states that "1:1 squares" are the default. + aspectRatio: .square1x1, + imageSize: .size2K + ) + let generationConfig = GenerationConfig( + temperature: 0.0, + topP: 0.0, + topK: 1, + responseModalities: [.image], + imageConfig: imageConfig + ) + let model = FirebaseAI.componentInstance(config).generativeModel( + modelName: modelName, + generationConfig: generationConfig, + safetySettings: safetySettings + ) + let prompt = "Generate an image of a cute cartoon puppy catching a ball in the air." + + let response = try await model.generateContent(prompt) + + let candidate = try #require(response.candidates.first) + let inlineDataPart = try #require(candidate.content.parts + .first { $0 is InlineDataPart } as? InlineDataPart) + let inlineDataPartsViaAccessor = response.inlineDataParts + #expect(inlineDataPartsViaAccessor.count == 1) + let inlineDataPartViaAccessor = try #require(inlineDataPartsViaAccessor.first) + #expect(inlineDataPart == inlineDataPartViaAccessor) + #expect(inlineDataPart.mimeType.starts(with: "image/")) + #expect(inlineDataPart.data.count > 0) + #if canImport(UIKit) + let uiImage = try #require(UIImage(data: inlineDataPart.data)) + #expect(uiImage.size.width == 2048) + #expect(uiImage.size.height == 2048) + #endif // canImport(UIKit) + } + @Test(arguments: [ (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashImage), (InstanceConfig.vertexAI_v1beta_global, ModelNames.gemini2_5_FlashImage), @@ -399,11 +486,6 @@ struct GenerateContentIntegrationTests { topK: 1, responseModalities: [.text, .image] ) - let safetySettings = safetySettings.filter { - // HARM_CATEGORY_CIVIC_INTEGRITY is deprecated in Vertex AI but only rejected when using the - // 'gemini-2.0-flash-preview-image-generation' model. - $0.harmCategory != .civicIntegrity - } let model = FirebaseAI.componentInstance(config).generativeModel( modelName: modelName, generationConfig: generationConfig, diff --git a/FirebaseAI/Tests/TestApp/Tests/Integration/ImagenIntegrationTests.swift b/FirebaseAI/Tests/TestApp/Tests/Integration/ImagenIntegrationTests.swift index 95a4f04ff2b..4d9fce956e2 100644 --- a/FirebaseAI/Tests/TestApp/Tests/Integration/ImagenIntegrationTests.swift +++ b/FirebaseAI/Tests/TestApp/Tests/Integration/ImagenIntegrationTests.swift @@ -75,6 +75,39 @@ struct ImagenIntegrationTests { #endif // canImport(UIKit) } + @Test func generateImage_imageSize() async throws { + let generationConfig = ImagenGenerationConfig( + aspectRatio: .square1x1, + imageSize: .size2K, // Test with size2K + imageFormat: .png(), + addWatermark: false + ) + let model = vertex.imagenModel( + modelName: "imagen-4.0-generate-001", + generationConfig: generationConfig, + safetySettings: ImagenSafetySettings( + safetyFilterLevel: .blockLowAndAbove, + personFilterLevel: .allowAdult + ) + ) + let imagePrompt = "A futuristic city skyline at sunset" + + let response = try await model.generateImages(prompt: imagePrompt) + + #expect(response.filteredReason == nil) + #expect(response.images.count == 1) + let image = try #require(response.images.first) + #expect(image.mimeType == "image/png") + #expect(image.data.isEmpty == false) + #if canImport(UIKit) + let uiImage = try #require(UIImage(data: image.data)) + #expect(uiImage.size.width == 2048.0) // 2K square image + #expect(uiImage.size.height == 2048.0) + #endif // canImport(UIKit) + } + + // TODO: Add integration tests for image sizes and update to Imagen 4. + @Test func generateImages_gcsImages() async throws { let generationConfig = ImagenGenerationConfig( numberOfImages: 3, diff --git a/FirebaseAI/Tests/Unit/Types/Imagen/ImageGenerationParametersTests.swift b/FirebaseAI/Tests/Unit/Types/Imagen/ImageGenerationParametersTests.swift index 0d398738111..ec90377c5eb 100644 --- a/FirebaseAI/Tests/Unit/Types/Imagen/ImageGenerationParametersTests.swift +++ b/FirebaseAI/Tests/Unit/Types/Imagen/ImageGenerationParametersTests.swift @@ -30,6 +30,7 @@ final class ImageGenerationParametersTests: XCTestCase { storageURI: nil, negativePrompt: nil, aspectRatio: nil, + sampleImageSize: nil, safetyFilterLevel: nil, personGeneration: nil, outputOptions: nil, @@ -54,6 +55,7 @@ final class ImageGenerationParametersTests: XCTestCase { storageURI: storageURI, negativePrompt: nil, aspectRatio: nil, + sampleImageSize: nil, safetyFilterLevel: nil, personGeneration: nil, outputOptions: nil, @@ -77,11 +79,13 @@ final class ImageGenerationParametersTests: XCTestCase { let compressionQuality = 80 let imageFormat = ImagenImageFormat.jpeg(compressionQuality: compressionQuality) let aspectRatio = ImagenAspectRatio.landscape16x9 + let imageSize = ImagenImageSize.size1K let addWatermark = true let generationConfig = ImagenGenerationConfig( negativePrompt: negativePrompt, numberOfImages: sampleCount, aspectRatio: aspectRatio, + imageSize: imageSize, imageFormat: imageFormat, addWatermark: addWatermark ) @@ -90,6 +94,7 @@ final class ImageGenerationParametersTests: XCTestCase { storageURI: nil, negativePrompt: negativePrompt, aspectRatio: aspectRatio.rawValue, + sampleImageSize: imageSize.rawValue, safetyFilterLevel: nil, personGeneration: nil, outputOptions: ImageGenerationOutputOptions( @@ -123,6 +128,7 @@ final class ImageGenerationParametersTests: XCTestCase { storageURI: nil, negativePrompt: nil, aspectRatio: nil, + sampleImageSize: nil, safetyFilterLevel: safetyFilterLevel.rawValue, personGeneration: personFilterLevel.rawValue, outputOptions: nil, @@ -148,11 +154,13 @@ final class ImageGenerationParametersTests: XCTestCase { let negativePrompt = "test-negative-prompt" let imageFormat = ImagenImageFormat.png() let aspectRatio = ImagenAspectRatio.portrait3x4 + let imageSize = ImagenImageSize.size4K let addWatermark = false let generationConfig = ImagenGenerationConfig( negativePrompt: negativePrompt, numberOfImages: sampleCount, aspectRatio: aspectRatio, + imageSize: imageSize, imageFormat: imageFormat, addWatermark: addWatermark ) @@ -167,6 +175,7 @@ final class ImageGenerationParametersTests: XCTestCase { storageURI: storageURI, negativePrompt: negativePrompt, aspectRatio: aspectRatio.rawValue, + sampleImageSize: imageSize.rawValue, safetyFilterLevel: safetyFilterLevel.rawValue, personGeneration: personFilterLevel.rawValue, outputOptions: ImageGenerationOutputOptions( @@ -192,6 +201,90 @@ final class ImageGenerationParametersTests: XCTestCase { XCTAssertNil(parameters.outputOptions?.compressionQuality) } + func testParameters_includeImageSize() throws { + let sampleCount = 2 + let negativePrompt = "test-negative-prompt-image-size" + let imageFormat = ImagenImageFormat.png() + let imageSize = ImageConfig.ImageSize.size2K + let addWatermark = true + let generationConfig = ImagenGenerationConfig( + negativePrompt: negativePrompt, + numberOfImages: sampleCount, + imageSize: imageSize, + imageFormat: imageFormat, + addWatermark: addWatermark + ) + let expectedParameters = ImageGenerationParameters( + sampleCount: sampleCount, + storageURI: nil, + negativePrompt: negativePrompt, + aspectRatio: nil, + sampleImageSize: imageSize.rawValue, + safetyFilterLevel: nil, + personGeneration: nil, + outputOptions: ImageGenerationOutputOptions( + mimeType: imageFormat.mimeType, + compressionQuality: imageFormat.compressionQuality + ), + addWatermark: addWatermark, + includeResponsibleAIFilterReason: true, + includeSafetyAttributes: true + ) + + let parameters = ImagenModel.imageGenerationParameters( + storageURI: nil, + generationConfig: generationConfig, + safetySettings: nil + ) + + XCTAssertEqual(parameters, expectedParameters) + XCTAssertEqual(parameters.sampleImageSize, "2K") + XCTAssertNil(parameters.aspectRatio) + } + + func testParameters_includeAspectRatioAndImageSize() throws { + let sampleCount = 3 + let negativePrompt = "test-negative-prompt-aspect-ratio-and-image-size" + let imageFormat = ImagenImageFormat.jpeg(compressionQuality: 70) + let aspectRatio = ImageConfig.AspectRatio.portrait9x16 + let imageSize = ImageConfig.ImageSize.size4K + let addWatermark = false + let generationConfig = ImagenGenerationConfig( + negativePrompt: negativePrompt, + numberOfImages: sampleCount, + aspectRatio: aspectRatio, + imageSize: imageSize, + imageFormat: imageFormat, + addWatermark: addWatermark + ) + let expectedParameters = ImageGenerationParameters( + sampleCount: sampleCount, + storageURI: nil, + negativePrompt: negativePrompt, + aspectRatio: aspectRatio.rawValue, + sampleImageSize: imageSize.rawValue, + safetyFilterLevel: nil, + personGeneration: nil, + outputOptions: ImageGenerationOutputOptions( + mimeType: imageFormat.mimeType, + compressionQuality: imageFormat.compressionQuality + ), + addWatermark: addWatermark, + includeResponsibleAIFilterReason: true, + includeSafetyAttributes: true + ) + + let parameters = ImagenModel.imageGenerationParameters( + storageURI: nil, + generationConfig: generationConfig, + safetySettings: nil + ) + + XCTAssertEqual(parameters, expectedParameters) + XCTAssertEqual(parameters.aspectRatio, "9:16") + XCTAssertEqual(parameters.sampleImageSize, "4K") + } + // MARK: - Encoding Tests func testEncodeParameters_allSpecified() throws { @@ -199,6 +292,7 @@ final class ImageGenerationParametersTests: XCTestCase { let storageURI = "gs://bucket/folder" let negativePrompt = "test-negative-prompt" let aspectRatio = "16:9" + let imageSize = "4K" let safetyFilterLevel = "block_low_and_above" let personGeneration = "allow_adult" let mimeType = "image/png" @@ -211,6 +305,7 @@ final class ImageGenerationParametersTests: XCTestCase { storageURI: storageURI, negativePrompt: negativePrompt, aspectRatio: aspectRatio, + sampleImageSize: imageSize, safetyFilterLevel: safetyFilterLevel, personGeneration: personGeneration, outputOptions: outputOptions, @@ -235,6 +330,7 @@ final class ImageGenerationParametersTests: XCTestCase { "personGeneration" : "\(personGeneration)", "safetySetting" : "\(safetyFilterLevel)", "sampleCount" : \(sampleCount), + "sampleImageSize" : "\(imageSize)", "storageUri" : "\(storageURI)" } """) @@ -243,6 +339,7 @@ final class ImageGenerationParametersTests: XCTestCase { func testEncodeParameters_someSpecified() throws { let sampleCount = 2 let aspectRatio = "3:4" + let imageSize = "2K" let safetyFilterLevel = "block_medium_and_above" let addWatermark = true let parameters = ImageGenerationParameters( @@ -250,6 +347,7 @@ final class ImageGenerationParametersTests: XCTestCase { storageURI: nil, negativePrompt: nil, aspectRatio: aspectRatio, + sampleImageSize: imageSize, safetyFilterLevel: safetyFilterLevel, personGeneration: nil, outputOptions: nil, @@ -266,7 +364,8 @@ final class ImageGenerationParametersTests: XCTestCase { "addWatermark" : \(addWatermark), "aspectRatio" : "\(aspectRatio)", "safetySetting" : "\(safetyFilterLevel)", - "sampleCount" : \(sampleCount) + "sampleCount" : \(sampleCount), + "sampleImageSize" : "\(imageSize)" } """) } @@ -277,6 +376,7 @@ final class ImageGenerationParametersTests: XCTestCase { storageURI: nil, negativePrompt: nil, aspectRatio: nil, + sampleImageSize: nil, safetyFilterLevel: nil, personGeneration: nil, outputOptions: nil, diff --git a/FirebaseAI/Tests/Unit/Types/Imagen/ImagenGenerationRequestTests.swift b/FirebaseAI/Tests/Unit/Types/Imagen/ImagenGenerationRequestTests.swift index 70a98a54321..244c16fcf85 100644 --- a/FirebaseAI/Tests/Unit/Types/Imagen/ImagenGenerationRequestTests.swift +++ b/FirebaseAI/Tests/Unit/Types/Imagen/ImagenGenerationRequestTests.swift @@ -23,6 +23,7 @@ final class ImagenGenerationRequestTests: XCTestCase { let modelName = "test-model-name" let sampleCount = 4 let aspectRatio = "16:9" + let imageSize = "1K" let safetyFilterLevel = "block_low_and_above" let includeResponsibleAIFilterReason = true let includeSafetyAttributes = true @@ -31,6 +32,7 @@ final class ImagenGenerationRequestTests: XCTestCase { storageURI: nil, negativePrompt: nil, aspectRatio: aspectRatio, + sampleImageSize: imageSize, safetyFilterLevel: safetyFilterLevel, personGeneration: nil, outputOptions: nil, @@ -112,7 +114,8 @@ final class ImagenGenerationRequestTests: XCTestCase { "includeRaiReason" : \(includeResponsibleAIFilterReason), "includeSafetyAttributes" : \(includeSafetyAttributes), "safetySetting" : "\(safetyFilterLevel)", - "sampleCount" : \(sampleCount) + "sampleCount" : \(sampleCount), + "sampleImageSize" : "\(imageSize)" } } """) @@ -142,7 +145,8 @@ final class ImagenGenerationRequestTests: XCTestCase { "includeRaiReason" : \(includeResponsibleAIFilterReason), "includeSafetyAttributes" : \(includeSafetyAttributes), "safetySetting" : "\(safetyFilterLevel)", - "sampleCount" : \(sampleCount) + "sampleCount" : \(sampleCount), + "sampleImageSize" : "\(imageSize)" } } """)