|
| 1 | +--- |
| 2 | +layout: model |
| 3 | +title: E5V Embeddings |
| 4 | +author: John Snow Labs |
| 5 | +name: e5v_int4 |
| 6 | +date: 2025-06-10 |
| 7 | +tags: [en, open_source, openvino] |
| 8 | +task: Embeddings |
| 9 | +language: en |
| 10 | +edition: Spark NLP 5.5.1 |
| 11 | +spark_version: 3.0 |
| 12 | +supported: true |
| 13 | +engine: openvino |
| 14 | +annotator: E5VEmbeddings |
| 15 | +article_header: |
| 16 | + type: cover |
| 17 | +use_language_switcher: "Python-Scala-Java" |
| 18 | +--- |
| 19 | + |
| 20 | +## Description |
| 21 | + |
| 22 | +Universal multimodal embeddings using E5-V. |
| 23 | + |
| 24 | +E5-V is a multimodal embedding model that bridges the modality gap between text and images, enabling strong performance in cross-modal retrieval, classification, clustering, and more. It supports both image+text and text-only embedding scenarios, and is fine-tuned from lmms-lab/llama3-llava-next-8b. The default model is `"e5v_int4"`. |
| 25 | + |
| 26 | +Pretrained models can be loaded with `pretrained` of the companion object: |
| 27 | + |
| 28 | +## Predicted Entities |
| 29 | + |
| 30 | + |
| 31 | + |
| 32 | +{:.btn-box} |
| 33 | +<button class="button button-orange" disabled>Live Demo</button> |
| 34 | +<button class="button button-orange" disabled>Open in Colab</button> |
| 35 | +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5v_int4_en_5.5.1_3.0_1749533458977.zip){:.button.button-orange.button-orange-trans.arr.button-icon} |
| 36 | +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5v_int4_en_5.5.1_3.0_1749533458977.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} |
| 37 | + |
| 38 | +## How to use |
| 39 | + |
| 40 | + |
| 41 | + |
| 42 | +<div class="tabs-box" markdown="1"> |
| 43 | +{% include programmingLanguageSelectScalaPythonNLU.html %} |
| 44 | +```python |
| 45 | +# Image + Text Embedding |
| 46 | +import sparknlp |
| 47 | +from sparknlp.base import * |
| 48 | +from sparknlp.annotator import * |
| 49 | +from pyspark.ml import Pipeline |
| 50 | +from pyspark.sql.functions import lit |
| 51 | + |
| 52 | +image_df = spark.read.format("image").option("dropInvalid", True).load(imageFolder) |
| 53 | +imagePrompt = "<|start_header_id|>user<|end_header_id|>\n\n<image>\\nSummary above image in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" |
| 54 | +test_df = image_df.withColumn("text", lit(imagePrompt)) |
| 55 | +imageAssembler = ImageAssembler() \ |
| 56 | + .setInputCol("image") \ |
| 57 | + .setOutputCol("image_assembler") |
| 58 | +e5vEmbeddings = E5VEmbeddings.pretrained() \ |
| 59 | + .setInputCols(["image_assembler"]) \ |
| 60 | + .setOutputCol("e5v") |
| 61 | +pipeline = Pipeline().setStages([ |
| 62 | + imageAssembler, |
| 63 | + e5vEmbeddings |
| 64 | +]) |
| 65 | +result = pipeline.fit(test_df).transform(test_df) |
| 66 | +result.select("e5v.embeddings").show(truncate=False) |
| 67 | + |
| 68 | +# Text-Only Embedding |
| 69 | +from sparknlp.util import EmbeddingsDataFrameUtils |
| 70 | +textPrompt = "<|start_header_id|>user<|end_header_id|>\n\n<sent>\\nSummary above sentence in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" |
| 71 | +textDesc = "A cat sitting in a box." |
| 72 | +nullImageDF = spark.createDataFrame( |
| 73 | + spark.sparkContext.parallelize([EmbeddingsDataFrameUtils.emptyImageRow]), |
| 74 | + EmbeddingsDataFrameUtils.imageSchema) |
| 75 | +textDF = nullImageDF.withColumn("text", lit(textPrompt.replace("<sent>", textDesc))) |
| 76 | +e5vEmbeddings = E5VEmbeddings.pretrained() \ |
| 77 | + .setInputCols(["image"]) \ |
| 78 | + .setOutputCol("e5v") |
| 79 | +result = e5vEmbeddings.transform(textDF) |
| 80 | +result.select("e5v.embeddings").show(truncate=False) |
| 81 | +``` |
| 82 | +```scala |
| 83 | +// Image + Text Embedding |
| 84 | +import org.apache.spark.sql.functions.lit |
| 85 | +import com.johnsnowlabs.nlp.base.ImageAssembler |
| 86 | +import com.johnsnowlabs.nlp.embeddings.E5VEmbeddings |
| 87 | +import org.apache.spark.ml.Pipeline |
| 88 | + |
| 89 | +val imageDF = spark.read.format("image").option("dropInvalid", value = true).load(imageFolder) |
| 90 | +val imagePrompt = "<|start_header_id|>user<|end_header_id|>\n\n<image>\\nSummary above image in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" |
| 91 | +val testDF = imageDF.withColumn("text", lit(imagePrompt)) |
| 92 | +val imageAssembler = new ImageAssembler().setInputCol("image").setOutputCol("image_assembler") |
| 93 | +val e5vEmbeddings = E5VEmbeddings.pretrained() |
| 94 | + .setInputCols("image_assembler") |
| 95 | + .setOutputCol("e5v") |
| 96 | +val pipeline = new Pipeline().setStages(Array(imageAssembler, e5vEmbeddings)) |
| 97 | +val result = pipeline.fit(testDF).transform(testDF) |
| 98 | +result.select("e5v.embeddings").show(truncate = false) |
| 99 | + |
| 100 | +// Text-Only Embedding |
| 101 | +import com.johnsnowlabs.nlp.util.EmbeddingsDataFrameUtils.{emptyImageRow, imageSchema} |
| 102 | +val textPrompt = "<|start_header_id|>user<|end_header_id|>\n\n<sent>\\nSummary above sentence in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" |
| 103 | +val textDesc = "A cat sitting in a box." |
| 104 | +val nullImageDF = spark.createDataFrame(spark.sparkContext.parallelize(Seq(emptyImageRow)), imageSchema) |
| 105 | +val textDF = nullImageDF.withColumn("text", lit(textPrompt.replace("<sent>", textDesc))) |
| 106 | +val e5vEmbeddings = E5VEmbeddings.pretrained() |
| 107 | + .setInputCols("image") |
| 108 | + .setOutputCol("e5v") |
| 109 | +val result2 = e5vEmbeddings.transform(textDF) |
| 110 | +result2.select("e5v.embeddings").show(truncate = false) |
| 111 | +``` |
| 112 | +</div> |
| 113 | + |
| 114 | +{:.model-param} |
| 115 | +## Model Information |
| 116 | + |
| 117 | +{:.table-model} |
| 118 | +|---|---| |
| 119 | +|Model Name:|e5v_int4| |
| 120 | +|Compatibility:|Spark NLP 5.5.1+| |
| 121 | +|License:|Open Source| |
| 122 | +|Edition:|Official| |
| 123 | +|Input Labels:|[image_assembler]| |
| 124 | +|Output Labels:|[answer]| |
| 125 | +|Language:|en| |
| 126 | +|Size:|4.9 GB| |
0 commit comments