CodeWithKyrian
diff --git a/‎README.md‎
Lines changed: 68 additions & 36 deletions b/‎README.md‎
Lines changed: 68 additions & 36 deletions
diff --git a/‎docs/.vitepress/config.mts‎
Lines changed: 36 additions & 15 deletions b/‎docs/.vitepress/config.mts‎
Lines changed: 36 additions & 15 deletions
diff --git a/‎docs/auto-models.md‎
Lines changed: 0 additions & 5 deletions b/‎docs/auto-models.md‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎docs/auto-tokenizers.md‎
Lines changed: 0 additions & 5 deletions b/‎docs/auto-tokenizers.md‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎docs/configuration.md‎
Lines changed: 17 additions & 1 deletion b/‎docs/configuration.md‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎docs/getting-started.md‎
Lines changed: 9 additions & 12 deletions b/‎docs/getting-started.md‎
Lines changed: 9 additions & 12 deletions
diff --git a/‎docs/image-classification.md‎
Lines changed: 101 additions & 0 deletions b/‎docs/image-classification.md‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎docs/image-feature-extraction.md‎
Lines changed: 74 additions & 0 deletions b/‎docs/image-feature-extraction.md‎
Lines changed: 74 additions & 0 deletions
@@ -11,7 +11,7 @@ export default defineConfig({
             {text: 'Home', link: '/'},
             {text: 'Docs', link: '/introduction'},
             {
-                text: '0.1.x',
+                text: '0.3.x',
                 items: [
                     {
                         text: 'Changelog',
@@ -41,31 +41,52 @@ export default defineConfig({
                 collapsed: false,
                 link: '/pipelines',
                 items: [
-                    {text: 'Text Classification', link: '/text-classification'},
-                    {text: 'Fill Mask', link: '/fill-mask'},
-                    {text: 'Zero Shot Classification', link: '/zero-shot-classification'},
-                    {text: 'Question Answering', link: '/question-answering'},
-                    {text: 'Token Classification', link: '/token-classification'},
-                    {text: 'Feature Extraction', link: '/feature-extraction'},
-                    {text: 'Text to Text Generation', link: '/text-to-text-generation'},
-                    {text: 'Translation', link: '/translation'},
-                    {text: 'Summarization', link: '/summarization'},
-                    {text: 'Text Generation', link: '/text-generation'},
+                    {
+                        text: 'NLP Tasks',
+                        collapsed: true,
+                        items: [
+                            {text: 'Text Classification', link: '/text-classification'},
+                            {text: 'Fill Mask', link: '/fill-mask'},
+                            {text: 'Zero Shot Classification', link: '/zero-shot-classification'},
+                            {text: 'Question Answering', link: '/question-answering'},
+                            {text: 'Token Classification', link: '/token-classification'},
+                            {text: 'Feature Extraction', link: '/feature-extraction'},
+                            {text: 'Text to Text Generation', link: '/text-to-text-generation'},
+                            {text: 'Translation', link: '/translation'},
+                            {text: 'Summarization', link: '/summarization'},
+                            {text: 'Text Generation', link: '/text-generation'},
+                        ]
+                    },
+                    {
+                        text: 'Computer Vision Tasks',
+                        collapsed: true,
+                        items: [
+                            {text: 'Image Classification', link: '/image-classification'},
+                            {text: 'Zero Shot Image Classification', link: '/zero-shot-image-classification'},
+                            {text: 'Object Detection', link: '/object-detection'},
+                            {text: 'Zero Shot Object Detection', link: '/zero-shot-object-detection'},
+                            {text: 'Image Feature Extraction', link: '/image-feature-extraction'},
+                            {text: 'Image To Text', link: '/image-to-text'},
+                            {text: 'Image To Image', link: '/image-to-image'},
+                        ]
+                    }
                 ]
             },
             {
                 text: 'Advanced Usage',
                 collapsed: false,
                 items: [
-                    {text: 'Auto Models', link: '/auto-models'},
-                    {text: 'Auto Tokenizers', link: '/auto-tokenizers'},
+                    {text: 'Models', link: '/models'},
+                    {text: 'Tokenizers', link: '/tokenizers'},
                 ]
             },
             {
                 text: 'Utilities',
                 collapsed: false,
                 items: [
-                    {text: 'Generation', link: '/generation'},
+                    {text: 'Generation', link: '/utils/generation'},
+                    {text: 'Image', link: '/utils/image'},
+                    {text: 'Tensor', link: '/utils/tensor'},
                 ]
             }
         ],
@@ -77,7 +98,7 @@ export default defineConfig({
 
         footer: {
             message: 'Released under the MIT License.',
-            copyright: 'Copyright © 2024 <a href="https://github.com/CodeWithKyrian">Kyrian Obikwelu</a>'
+            copyright: 'Copyright © 2024 <a href="https://twitter.com/CodeWithKyrian">Kyrian Obikwelu</a>'
         },
 
         editLink: {
 
@@ -14,9 +14,11 @@ models, and the remote path template. These settings allow you to tailor how and
 
 ```php
 use Codewithkyrian\Transformers\Transformers;
+use Codewithkyrian\Transformers\Utils\ImageDriver;
 
  Transformers::setup()
         ->setCacheDir('/path/to/models')
+        ->setImageDriver(ImageDriver::IMAGICK)
         ->setRemoteHost('https://yourmodelshost.com')
         ->setRemotePathTemplate('custom/path/{model}/{file}')
         ->setAuthToken('your-token')
@@ -94,6 +96,20 @@ Transformers::setup()
     ->apply();
 ```
 
+### `setImageDriver(ImageDriver $imageDriver)`
+
+This setting allows you to specify the image backend to use for image processing tasks. By default, TransformersPHP uses
+the `IMAGICK` image driver. You can change this to `GD` or `VIPS` if you prefer, just make sure to have the required
+extensions installed.
+
+```php
+use Codewithkyrian\Transformers\Utils\ImageDriver;
+
+Transformers::setup()
+    ->setImageDriver(ImageDriver::GD)
+    ->apply();
+```
+
 ## Applying Configuration
 
 ::: danger VERY IMPORTANT
@@ -127,7 +143,7 @@ use Codewithkyrian\Transformers\Transformers;
 ### Laravel Projects
 
 In a Laravel project, you can add global configuration in the `AppServiceProvider` class. Laravel service providers are
-excellent locations for bootstrap code, making them the best place to set up global configurations. It's recommended to 
+excellent locations for bootstrap code, making them the best place to set up global configurations. It's recommended to
 set the cache directory to the a subdirectory of the `storage` directory, as it's writable and not publicly accessible.
 
 ::: code-group
 
@@ -21,17 +21,14 @@ You can install the library via Composer. This is the recommended way to install
 composer require codewithkyrian/transformers
 ```
 
-After installation, you need to initialize the package to download the necessary shared libraries for running the ONNX
-models:
-
-```bash
-./vendor/bin/transformers install
-```
+ONNX runtime will be installed automatically as well. For Windows users, it may take more time to install the ONNX
+library compared to Linux or macOS users (no shades 😅).
 
 > [!CAUTION]
-> These shared libraries to be downloaded are platform-specific, so it's important to run this command on the target
-> platform where the code will be executed. For example, if you're using a Docker container, run the `install` command
-> inside that container.
+> The ONNX library is platform-specific, so it's important to run the composer require command on the target platform
+> where the code will be executed. In most cases, this will be your development machine or a server where you deploy
+> your
+> application, but if you're using a Docker container, run the `composer require` command inside that container.
 
 This command sets up everything you need to start using pre-trained ONNX models with TransformersPHP.
 
@@ -142,7 +139,7 @@ in PHP 7.4 and later, but it may not be enabled by default. To check if the FFI
 command:
 
 ```bash
-php -m | grep ffi
+php -m | grep FFI
 ```
 
 If the FFI extension is not enabled, you can enable it by uncommenting(remove the `;` from the beginning of the line)
@@ -153,7 +150,7 @@ following line in your `php.ini` file:
 extension = ffi
 ```
 
-Also, you need to set the `ffi.enable` directive to `true` in your `php.ini` file:
+TransformersPHP does not support FFI preloading yet, so you need to enable the `ffi.enable` directive in your `php.ini`
 
 ```ini
 ffi.enable = true
@@ -166,7 +163,7 @@ After making these changes, restart your web server or PHP-FPM service, and you
 Just-In-Time (JIT) compilation is a feature that allows PHP to compile and execute code at runtime. JIT compilation can
 improve the performance of your application by compiling frequently executed code paths into machine code. While you
 can use TransformersPHP without JIT compilation, enabling it can provide a significant performance boost (> 2x in some
-cases).
+cases) since there are many matrix multiplications and other mathematical operations involved in running ONNX models.
 
 JIT compilation is available in PHP 8.0 and later, but it may not be enabled by default. To enable JIT compilation,
 change the `opcache.jit` directive in your `php.ini` file:
 
@@ -0,0 +1,101 @@
+---
+outline: deep
+---
+
+# Image Classification <Badge type="tip" text="^0.3.0" />
+
+Image classification is a computer vision task that involves assigning a label or class to an image. An image
+is expected to have only one label in this task. The labels to be selected from are predefined by the model.
+This task accepts image inputs and returns the classification label and the confidence score.
+
+## Task ID
+
+- `image-classification`
+
+## Default Model
+
+- `Xenova/vit-base-patch16-224`.
+
+## Use Cases
+
+Image classification models find application in various scenarios, including:
+
+- **Stock Photography Keywording:** Assigning keywords to images in stock photography databases.
+- **Image Search:** Organizing and categorizing photo galleries on devices or in the cloud based on multiple keywords or
+  tags.
+- **Content Filtering:** Filtering and categorizing images for content moderation purposes.
+- **Medical Imaging:** Assisting in the diagnosis and classification of medical images such as X-rays and MRI scans.
+
+## Running an Inference Session
+
+Here's how to perform image classification using the pipeline:
+
+```php
+use function Codewithkyrian\Transformers\Pipelines\pipeline;
+
+$classifier = pipeline('image-classification');
+
+$result = $classifier('path/to/image.jpg');
+```
+
+::: details Click to view output
+
+```php
+['label' => 'tiger, Panthera tigris',  'score' => 0.63534494664876]
+```
+
+:::
+
+## Pipeline Input Options
+
+When running the `image-classification` pipeline, you can the following options:
+
+- ### `texts` *(string)*
+  The image(s) to classify. It can be a local file path, a file resource, a URL to an image (local or remote), or an
+  array of these inputs. It's the first argument so there's no need to pass it as a named argument.
+  ```php
+      $result = $classifier('https://example.com/image.jpg');
+  ```
+
+- ### `topK` *(int)*
+  The number of top labels to return. The default is `1`.
+  ```php
+      $result = $classifier('https://example.com/image.jpg', topK: 3);
+  ```
+  ::: details Click to view output
+
+  ```php
+  [
+    ['label' => 'tiger, Panthera tigris',  'score' => 0.63534494664876],
+    ['label' => 'zebra',  'score' => 0.123456789],
+    ['label' => 'lion, Panthera leo',  'score' => 0.098765432]
+  ]
+  ```
+  :::
+
+## Pipeline Outputs
+
+The output of the pipeline is an array containing the classification label and the confidence score. The confidence
+score is a value between 0 and 1, with 1 being the highest confidence.
+
+Since the actual labels depend on the model, it's crucial to consult the model's documentation for the specific labels
+it uses. Here are examples demonstrating how outputs might differ:
+
+For a single image:
+
+```php
+['label' => 'tiger, Panthera tigris',  'score' => 0.63534494664876]
+```
+
+For multiple images:
+
+```php
+[
+    ['label' => 'tiger, Panthera tigris',  'score' => 0.63534494664876],
+    ['label' => 'cat',  'score' => 0.987654321],
+    ['label' => 'dog',  'score' => 0.87654321]
+]
+```
+
+
+  
@@ -0,0 +1,74 @@
+---
+outline: deep
+---
+
+# Image Feature Extraction <Badge type="tip" text="^0.3.0" />
+
+Image feature extraction is a computer vision task that involves extracting high-level features from images. These
+features can be used for various purposes, such as image similarity search, image retrieval, and content-based image
+retrieval. The task accepts image inputs and returns a feature vector that represents the image.
+
+## Task ID
+
+- `image-feature-extraction`
+
+## Default Model
+
+- `Xenova/vit-base-patch16-224-in21k`
+
+## Use Cases
+
+Image feature extraction models find application in various scenarios, including:
+
+- **Image Retrieval:** Generating feature vectors for images to enable similarity search and retrieval of similar images
+  from a database.
+- **Content-Based Image Retrieval:** Enabling search engines to retrieve images based on their visual content rather
+  than textual metadata.
+- **Image Similarity Search:** Finding visually similar images based on their feature representations.
+- **Visual Search:** Enhancing e-commerce platforms by allowing users to search for products using images rather than
+  text.
+
+## Running an Inference Session
+
+Here's how to perform image feature extraction using the pipeline:
+
+```php
+use function Codewithkyrian\Transformers\Pipelines\pipeline;
+
+$extractor = pipeline('image-feature-extraction');
+
+$result = $extractor('path/to/image.jpg');
+```
+
+## Pipeline Input Options
+
+When running the `image-feature-extraction` pipeline, you can use the following options:
+
+- ### `texts` *(string|array)*
+  The image(s) from which features are extracted. You can pass a single image path or an array of image paths for batch
+  processing. It's required and is the first argument, so there's no need to pass it as a named argument.
+
+- ### `pool` *(bool)*
+  When set to `true`, it averages the feature vectors across all patches in the image. Before using this option, make
+  sure the model has a pooler layer. The default value is `false`.
+
+## Pipeline Output
+
+The output of the `image-feature-extraction` pipeline is a feature vector that represents the input image. The shape
+and size of the feature vector depend on the model architecture and configuration. For no pooling, the shape is
+usually `[X, Y, Z]` where :
+
+- `X` Represents the batch size (1 for single image input).
+- `Y` Denotes the sequence length or dimensionality of the features extracted from each token or patch. This dimension
+  is typically fixed across tokens and corresponds to the size of the feature vectors extracted from the image patches.
+- `Z` Represents the size of the feature vector extracted from each patch. This dimension is typically fixed across
+  patches and corresponds to the size of the feature vectors extracted from the image patches.
+
+For example, with certain models, such as those based on the Vision Transformer (ViT) architecture, the feature vector's
+shape might be `[1, 197, 768]`.
+
+When pooling is applied, the output shape is typically `[X, Z]`, where `Z` represents the size of the pooled feature
+vector.
+Pooling aggregates information from all the tokens or patches into a single feature vector, resulting in a
+reduced-dimensional representation of the input image. eg `[1, 768]`.
+