openvinotoolkit
diff --git a/‎.ci/cspell_dict.txt
Lines changed: 2 additions & 0 deletions b/‎.ci/cspell_dict.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 0 additions & 3 deletions b/‎README.md
Lines changed: 0 additions & 3 deletions
diff --git a/‎docs/api/requirements.txt
Lines changed: 2 additions & 1 deletion b/‎docs/api/requirements.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/usage/training_time_compression/quantization_aware_training/Usage.md
Lines changed: 1 addition & 5 deletions b/‎docs/usage/training_time_compression/quantization_aware_training/Usage.md
Lines changed: 1 addition & 5 deletions
diff --git a/‎examples/llm_compression/openvino/tiny_llama/main.py
Lines changed: 5 additions & 15 deletions b/‎examples/llm_compression/openvino/tiny_llama/main.py
Lines changed: 5 additions & 15 deletions
diff --git a/‎examples/llm_compression/torch/qat_with_lora/custom_eval_tasks/preprocess_wikitext.py
Lines changed: 58 additions & 0 deletions b/‎examples/llm_compression/torch/qat_with_lora/custom_eval_tasks/preprocess_wikitext.py
Lines changed: 58 additions & 0 deletions
diff --git a/‎examples/llm_compression/torch/qat_with_lora/custom_eval_tasks/wikitext_validation.yaml
Lines changed: 20 additions & 0 deletions b/‎examples/llm_compression/torch/qat_with_lora/custom_eval_tasks/wikitext_validation.yaml
Lines changed: 20 additions & 0 deletions
@@ -29,6 +29,7 @@ ascale
 ascontiguousarray
 asym
 asymm
+atleast
 atol
 atrous
 autocast
@@ -406,6 +407,7 @@ rtype
 sacts
 sanh
 savelyev
+savelyevv
 savetxt
 sdpa
 sdxl
 
@@ -196,9 +196,6 @@ def transform_fn(data_item):
 calibration_dataset = nncf.Dataset(val_dataset, transform_fn)
 # Step 3: Run the quantization pipeline
 quantized_model = nncf.quantize(model, calibration_dataset)
-# Step 4: Remove auxiliary layers and operations added during the quantization process,
-# resulting in a clean, fully quantized model ready for deployment.
-stripped_model = nncf.strip(quantized_model)
 ```
 
 </details>
 
@@ -1,4 +1,5 @@
 astroid==2.15.8
 Sphinx==6.1.3
 sphinx-autoapi==2.1.0
-furo==2023.3.27
+furo==2023.3.27
+snowballstemmer==2.2.0
@@ -60,11 +60,7 @@ ov_quantized_model = ov.convert_model(quantized_model.cpu(), example_input=dummy
 # To OpenVINO format
 import openvino as ov
 
-# Removes auxiliary layers and operations added during the quantization process,
-# resulting in a clean, fully quantized model ready for deployment.
-stripped_model = nncf.strip(quantized_model)
-
-ov_quantized_model = ov.convert_model(stripped_model)
+ov_quantized_model = ov.convert_model(quantized_model)
 ```
 
 </details>
 
@@ -12,7 +12,6 @@
 from functools import partial
 
 import numpy as np
-import openvino as ov
 from datasets import load_dataset
 from optimum.intel.openvino import OVModelForCausalLM
 from transformers import AutoTokenizer
@@ -27,9 +26,9 @@ def main():
     dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
 
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-    model = OVModelForCausalLM.from_pretrained(MODEL_ID, export=True, load_in_8bit=False, compile=False, stateful=False)
+    model = OVModelForCausalLM.from_pretrained(MODEL_ID, export=True, load_in_8bit=False, compile=False)
 
-    def transform_fn(data, model, tokenizer):
+    def transform_fn(data, tokenizer):
         tokenized_text = tokenizer(data["text"], return_tensors="np")
         input_ids = tokenized_text["input_ids"]
         attention_mask = tokenized_text["attention_mask"]
@@ -39,23 +38,14 @@ def transform_fn(data, model, tokenizer):
         inputs["attention_mask"] = tokenized_text["attention_mask"]
         position_ids = np.cumsum(attention_mask, axis=1) - 1
         position_ids[attention_mask == 0] = 1
+        inputs["position_ids"] = position_ids
 
-        # The magic forms KV cache as model inputs
         batch_size = input_ids.shape[0]
-        for input_name in model.key_value_input_names:
-            model_inputs = model.model.input(input_name)
-            shape = model_inputs.get_partial_shape()
-            shape[0] = batch_size
-            if shape[2].is_dynamic:
-                shape[2] = 0
-            else:
-                shape[1] = 0
-            inputs[input_name] = ov.Tensor(model_inputs.get_element_type(), shape.get_shape())
+        inputs["beam_idx"] = np.arange(batch_size, dtype=int)
 
-        inputs["position_ids"] = position_ids
         return inputs
 
-    quantization_dataset = nncf.Dataset(dataset, partial(transform_fn, model=model, tokenizer=tokenizer))
+    quantization_dataset = nncf.Dataset(dataset, partial(transform_fn, tokenizer=tokenizer))
 
     # Comment this text to turn off model optimization and measure performance of baseline model
     model.model = nncf.compress_weights(
 
@@ -0,0 +1,58 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+
+
+def wikitext_detokenizer(doc):
+    string = doc["page"]
+    # contractions
+    string = string.replace("s '", "s'")
+    string = re.sub(r"/' [0-9]/", r"/'[0-9]/", string)
+    # number separators
+    string = string.replace(" @-@ ", "-")
+    string = string.replace(" @,@ ", ",")
+    string = string.replace(" @.@ ", ".")
+    # punctuation
+    string = string.replace(" : ", ": ")
+    string = string.replace(" ; ", "; ")
+    string = string.replace(" . ", ". ")
+    string = string.replace(" ! ", "! ")
+    string = string.replace(" ? ", "? ")
+    string = string.replace(" , ", ", ")
+    # double brackets
+    string = re.sub(r"\(\s*([^\)]*?)\s*\)", r"(\1)", string)
+    string = re.sub(r"\[\s*([^\]]*?)\s*\]", r"[\1]", string)
+    string = re.sub(r"{\s*([^}]*?)\s*}", r"{\1}", string)
+    string = re.sub(r"\"\s*([^\"]*?)\s*\"", r'"\1"', string)
+    string = re.sub(r"'\s*([^']*?)\s*'", r"'\1'", string)
+    # miscellaneous
+    string = string.replace("= = = =", "====")
+    string = string.replace("= = =", "===")
+    string = string.replace("= =", "==")
+    string = string.replace(" " + chr(176) + " ", chr(176))
+    string = string.replace(" \n", "\n")
+    string = string.replace("\n ", "\n")
+    string = string.replace(" N ", " 1 ")
+    string = string.replace(" 's", "'s")
+
+    return string
+
+
+def process_results(doc, results):
+    (loglikelihood,) = results
+    # IMPORTANT: wikitext counts number of words in *original doc before detokenization*
+    _words = len(re.split(r"\s+", doc["page"]))
+    _bytes = len(doc["page"].encode("utf-8"))
+    return {
+        "word_perplexity": (loglikelihood, _words),
+        "byte_perplexity": (loglikelihood, _bytes),
+        "bits_per_byte": (loglikelihood, _bytes),
+    }
@@ -0,0 +1,20 @@
+task: wikitext_validation
+dataset_path: EleutherAI/wikitext_document_level
+dataset_name: wikitext-2-raw-v1
+output_type: loglikelihood_rolling
+training_split: train
+validation_split: validation
+test_split: null
+doc_to_text: ""
+doc_to_target: !function preprocess_wikitext.wikitext_detokenizer
+process_results: !function preprocess_wikitext.process_results
+should_decontaminate: true
+doc_to_decontamination_query: "{{page}}"
+metric_list:
+  - metric: word_perplexity
+  - metric: byte_perplexity
+  - metric: bits_per_byte
+metadata:
+  version: 2.0
+dataset_kwargs:
+  trust_remote_code: true