Skip to content

Commit 28e8f71

Browse files
jpablomchYuan0320
andcommitted
Add BF16 results
Signed-off-by: J. Pablo Muñoz <[email protected]> Co-authored-by: Yuan0320 <[email protected]>
1 parent 407fbb3 commit 28e8f71

File tree

99 files changed

+18960
-9630
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+18960
-9630
lines changed

.ci/cspell_dict.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ ascale
2929
ascontiguousarray
3030
asym
3131
asymm
32+
atleast
3233
atol
3334
atrous
3435
autocast
@@ -406,6 +407,7 @@ rtype
406407
sacts
407408
sanh
408409
savelyev
410+
savelyevv
409411
savetxt
410412
sdpa
411413
sdxl

README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,6 @@ def transform_fn(data_item):
196196
calibration_dataset = nncf.Dataset(val_dataset, transform_fn)
197197
# Step 3: Run the quantization pipeline
198198
quantized_model = nncf.quantize(model, calibration_dataset)
199-
# Step 4: Remove auxiliary layers and operations added during the quantization process,
200-
# resulting in a clean, fully quantized model ready for deployment.
201-
stripped_model = nncf.strip(quantized_model)
202199
```
203200

204201
</details>

docs/api/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
astroid==2.15.8
22
Sphinx==6.1.3
33
sphinx-autoapi==2.1.0
4-
furo==2023.3.27
4+
furo==2023.3.27
5+
snowballstemmer==2.2.0

docs/usage/training_time_compression/quantization_aware_training/Usage.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,7 @@ ov_quantized_model = ov.convert_model(quantized_model.cpu(), example_input=dummy
6060
# To OpenVINO format
6161
import openvino as ov
6262

63-
# Removes auxiliary layers and operations added during the quantization process,
64-
# resulting in a clean, fully quantized model ready for deployment.
65-
stripped_model = nncf.strip(quantized_model)
66-
67-
ov_quantized_model = ov.convert_model(stripped_model)
63+
ov_quantized_model = ov.convert_model(quantized_model)
6864
```
6965

7066
</details>

examples/llm_compression/openvino/tiny_llama/main.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from functools import partial
1313

1414
import numpy as np
15-
import openvino as ov
1615
from datasets import load_dataset
1716
from optimum.intel.openvino import OVModelForCausalLM
1817
from transformers import AutoTokenizer
@@ -27,9 +26,9 @@ def main():
2726
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
2827

2928
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
30-
model = OVModelForCausalLM.from_pretrained(MODEL_ID, export=True, load_in_8bit=False, compile=False, stateful=False)
29+
model = OVModelForCausalLM.from_pretrained(MODEL_ID, export=True, load_in_8bit=False, compile=False)
3130

32-
def transform_fn(data, model, tokenizer):
31+
def transform_fn(data, tokenizer):
3332
tokenized_text = tokenizer(data["text"], return_tensors="np")
3433
input_ids = tokenized_text["input_ids"]
3534
attention_mask = tokenized_text["attention_mask"]
@@ -39,23 +38,14 @@ def transform_fn(data, model, tokenizer):
3938
inputs["attention_mask"] = tokenized_text["attention_mask"]
4039
position_ids = np.cumsum(attention_mask, axis=1) - 1
4140
position_ids[attention_mask == 0] = 1
41+
inputs["position_ids"] = position_ids
4242

43-
# The magic forms KV cache as model inputs
4443
batch_size = input_ids.shape[0]
45-
for input_name in model.key_value_input_names:
46-
model_inputs = model.model.input(input_name)
47-
shape = model_inputs.get_partial_shape()
48-
shape[0] = batch_size
49-
if shape[2].is_dynamic:
50-
shape[2] = 0
51-
else:
52-
shape[1] = 0
53-
inputs[input_name] = ov.Tensor(model_inputs.get_element_type(), shape.get_shape())
44+
inputs["beam_idx"] = np.arange(batch_size, dtype=int)
5445

55-
inputs["position_ids"] = position_ids
5646
return inputs
5747

58-
quantization_dataset = nncf.Dataset(dataset, partial(transform_fn, model=model, tokenizer=tokenizer))
48+
quantization_dataset = nncf.Dataset(dataset, partial(transform_fn, tokenizer=tokenizer))
5949

6050
# Comment this text to turn off model optimization and measure performance of baseline model
6151
model.model = nncf.compress_weights(
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright (c) 2025 Intel Corporation
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
# Unless required by applicable law or agreed to in writing, software
7+
# distributed under the License is distributed on an "AS IS" BASIS,
8+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
# See the License for the specific language governing permissions and
10+
# limitations under the License.
11+
import re
12+
13+
14+
def wikitext_detokenizer(doc):
15+
string = doc["page"]
16+
# contractions
17+
string = string.replace("s '", "s'")
18+
string = re.sub(r"/' [0-9]/", r"/'[0-9]/", string)
19+
# number separators
20+
string = string.replace(" @-@ ", "-")
21+
string = string.replace(" @,@ ", ",")
22+
string = string.replace(" @.@ ", ".")
23+
# punctuation
24+
string = string.replace(" : ", ": ")
25+
string = string.replace(" ; ", "; ")
26+
string = string.replace(" . ", ". ")
27+
string = string.replace(" ! ", "! ")
28+
string = string.replace(" ? ", "? ")
29+
string = string.replace(" , ", ", ")
30+
# double brackets
31+
string = re.sub(r"\(\s*([^\)]*?)\s*\)", r"(\1)", string)
32+
string = re.sub(r"\[\s*([^\]]*?)\s*\]", r"[\1]", string)
33+
string = re.sub(r"{\s*([^}]*?)\s*}", r"{\1}", string)
34+
string = re.sub(r"\"\s*([^\"]*?)\s*\"", r'"\1"', string)
35+
string = re.sub(r"'\s*([^']*?)\s*'", r"'\1'", string)
36+
# miscellaneous
37+
string = string.replace("= = = =", "====")
38+
string = string.replace("= = =", "===")
39+
string = string.replace("= =", "==")
40+
string = string.replace(" " + chr(176) + " ", chr(176))
41+
string = string.replace(" \n", "\n")
42+
string = string.replace("\n ", "\n")
43+
string = string.replace(" N ", " 1 ")
44+
string = string.replace(" 's", "'s")
45+
46+
return string
47+
48+
49+
def process_results(doc, results):
50+
(loglikelihood,) = results
51+
# IMPORTANT: wikitext counts number of words in *original doc before detokenization*
52+
_words = len(re.split(r"\s+", doc["page"]))
53+
_bytes = len(doc["page"].encode("utf-8"))
54+
return {
55+
"word_perplexity": (loglikelihood, _words),
56+
"byte_perplexity": (loglikelihood, _bytes),
57+
"bits_per_byte": (loglikelihood, _bytes),
58+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
task: wikitext_validation
2+
dataset_path: EleutherAI/wikitext_document_level
3+
dataset_name: wikitext-2-raw-v1
4+
output_type: loglikelihood_rolling
5+
training_split: train
6+
validation_split: validation
7+
test_split: null
8+
doc_to_text: ""
9+
doc_to_target: !function preprocess_wikitext.wikitext_detokenizer
10+
process_results: !function preprocess_wikitext.process_results
11+
should_decontaminate: true
12+
doc_to_decontamination_query: "{{page}}"
13+
metric_list:
14+
- metric: word_perplexity
15+
- metric: byte_perplexity
16+
- metric: bits_per_byte
17+
metadata:
18+
version: 2.0
19+
dataset_kwargs:
20+
trust_remote_code: true

0 commit comments

Comments
 (0)