@@ -801,11 +801,10 @@ struct PagedAttentionManager {
801801 return get_memory_from_vec (adaptive_rkv_diversity_block_set_indices_begins);
802802 }
803803
804- <<<<<<< HEAD
805804 memory::ptr get_token_type_ids_memory () {
806805 std::vector<int > token_type_ids = { 0 };
807806 return get_memory_from_vec (token_type_ids);
808- =======
807+
809808 memory::ptr get_qq_bias_memory () {
810809 std::vector<uint8_t > flat_qq_bias;
811810 for (const auto & matrix : qq_bias) {
@@ -818,7 +817,6 @@ struct PagedAttentionManager {
818817
819818 memory::ptr get_qq_bias_begins_memory () {
820819 return get_memory_from_vec (qq_bias_begins);
821- >>>>>>> 4e155ca09b (split tree mask PR)
822820 }
823821
824822 float get_default_scale () {
@@ -1850,13 +1848,10 @@ struct PagedAttentionTest : public ::testing::TestWithParam<T> {
18501848 auto adaptive_rkv_evictable_sizes_mem = pam.get_adaptive_rkv_evictable_sizes_memory ();
18511849 auto adaptive_rkv_diversity_block_set_indices_mem = pam.get_adaptive_rkv_diversity_block_set_indices_memory ();
18521850 auto adaptive_rkv_diversity_block_set_indices_begins_mem = pam.get_adaptive_rkv_diversity_block_set_indices_begins_memory ();
1853- <<<<<<< HEAD
18541851 auto token_type_ids_mem = pam.get_token_type_ids_memory ();
18551852
1856- =======
18571853 auto qq_bias = pam.get_qq_bias_memory ();
18581854 auto qq_bias_begins = pam.get_qq_bias_begins_memory ();
1859- >>>>>>> 4e155ca09b (split tree mask PR)
18601855 auto query_layout = query_mem->get_layout ();
18611856 auto key_layout = key_mem->get_layout ();
18621857 auto value_layout = value_mem->get_layout ();
@@ -1882,12 +1877,9 @@ struct PagedAttentionTest : public ::testing::TestWithParam<T> {
18821877 auto adaptive_rkv_evictable_sizes_layout = adaptive_rkv_evictable_sizes_mem->get_layout ();
18831878 auto adaptive_rkv_diversity_block_set_indices_layout = adaptive_rkv_diversity_block_set_indices_mem->get_layout ();
18841879 auto adaptive_rkv_diversity_block_set_indices_begins_layout = adaptive_rkv_diversity_block_set_indices_begins_mem->get_layout ();
1885- <<<<<<< HEAD
18861880 auto token_type_ids_layout = token_type_ids_mem->get_layout ();
1887- =======
18881881 auto qq_bias_layout = qq_bias->get_layout ();
18891882 auto qq_bias_begins_layout = qq_bias_begins->get_layout ();
1890- >>>>>>> 4e155ca09b (split tree mask PR)
18911883
18921884 // make layouts dynamic
18931885 query_layout.set_partial_shape (ov::PartialShape{ -1 , p.num_heads * p.k_head_size });
@@ -1977,12 +1969,9 @@ struct PagedAttentionTest : public ::testing::TestWithParam<T> {
19771969 input_info (" adaptive_rkv_evictable_sizes" ),
19781970 input_info (" adaptive_rkv_diversity_block_set_indices" ),
19791971 input_info (" adaptive_rkv_diversity_block_set_indices_begins" ),
1980- <<<<<<< HEAD
19811972 input_info (" token_type_ids" ),
1982- =======
19831973 input_info (" qq_bias" ),
19841974 input_info (" qq_bias_begins" )
1985- >>>>>>> 4e155ca09b (split tree mask PR)
19861975 };
19871976
19881977 auto pa_prim = paged_attention (" paged_attention" , pa_inputs);
@@ -2056,12 +2045,9 @@ struct PagedAttentionTest : public ::testing::TestWithParam<T> {
20562045 topology.add (input_layout (" adaptive_rkv_evictable_sizes" , adaptive_rkv_evictable_sizes_layout));
20572046 topology.add (input_layout (" adaptive_rkv_diversity_block_set_indices" , adaptive_rkv_diversity_block_set_indices_layout));
20582047 topology.add (input_layout (" adaptive_rkv_diversity_block_set_indices_begins" , adaptive_rkv_diversity_block_set_indices_begins_layout));
2059- <<<<<<< HEAD
20602048 topology.add (input_layout (" token_type_ids" , token_type_ids_layout));
2061- =======
20622049 topology.add (input_layout (" qq_bias" , qq_bias_layout));
20632050 topology.add (input_layout (" qq_bias_begins" , qq_bias_begins_layout));
2064- >>>>>>> 4e155ca09b (split tree mask PR)
20652051 }
20662052
20672053 ExecutionConfig config = get_test_default_config (get_test_engine ());
@@ -2099,12 +2085,9 @@ struct PagedAttentionTest : public ::testing::TestWithParam<T> {
20992085 network->set_input_data (" adaptive_rkv_evictable_sizes" , adaptive_rkv_evictable_sizes_mem);
21002086 network->set_input_data (" adaptive_rkv_diversity_block_set_indices" , adaptive_rkv_diversity_block_set_indices_mem);
21012087 network->set_input_data (" adaptive_rkv_diversity_block_set_indices_begins" , adaptive_rkv_diversity_block_set_indices_begins_mem);
2102- <<<<<<< HEAD
21032088 network->set_input_data (" token_type_ids" , token_type_ids_mem);
2104- =======
21052089 network->set_input_data (" qq_bias" , qq_bias);
21062090 network->set_input_data (" qq_bias_begins" , qq_bias_begins);
2107- >>>>>>> 4e155ca09b (split tree mask PR)
21082091
21092092 auto outputs = network->execute ();
21102093
@@ -2208,14 +2191,11 @@ struct paged_attention_test_params {
22082191 bool has_adaptive_rkv = false ;
22092192 int start_size = 0 ; // Common start_size for all sequences
22102193 std::vector<int > evictable_sizes; // Per-sequence evictable sizes
2211- <<<<<<< HEAD
22122194 ov::element::Type kv_cache_precision = ov::element::dynamic;
2213- =======
22142195
22152196 // test query-to-query attention bias
22162197 bool has_qq_bias = false ;
22172198 QueryToQueryAttentionDescriptor qq_bias_config;
2218- >>>>>>> 4e155ca09b (split tree mask PR)
22192199};
22202200
22212201class paged_attention_test : public PagedAttentionTest <paged_attention_test_params> {};
@@ -2502,12 +2482,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_adaptive_rkv, adaptive_rkv_diversity_test, ::test
25022482
25032483INSTANTIATE_TEST_SUITE_P (smoke_qq_bias, qq_bias_test, ::testing::ValuesIn(std::vector<paged_attention_test_params>{
25042484 // basic tests with 1 sequence
2505- paged_attention_test_params{ {{4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , DISABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, true , ENABLE_QQ_BIAS },
2506- paged_attention_test_params{ {{4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , ENABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, true , ENABLE_QQ_BIAS },
2507- paged_attention_test_params{ {{4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , ENABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_CHANNEL, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, true , ENABLE_QQ_BIAS },
2485+ paged_attention_test_params{ {{4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , DISABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, ov::element::dynamic, true , ENABLE_QQ_BIAS },
2486+ paged_attention_test_params{ {{4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , ENABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, ov::element::dynamic, true , ENABLE_QQ_BIAS },
2487+ paged_attention_test_params{ {{4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , ENABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_CHANNEL, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, ov::element::dynamic, true , ENABLE_QQ_BIAS },
25082488
25092489 // multi sequences tests
2510- paged_attention_test_params{ {{4 , 32 }, {128 , 0 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , DISABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, true , QueryToQueryAttentionDescriptor{{{{1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 }}}, {0 , 4 , 4 }} },
2511- paged_attention_test_params{ {{128 , 0 }, {4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , DISABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, true , QueryToQueryAttentionDescriptor{{{{1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 }}}, {0 , 0 , 4 }} },
2512- paged_attention_test_params{ {{4 , 20 }, {4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , DISABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, true , QueryToQueryAttentionDescriptor{{{{1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 }, {1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 }}}, {0 , 4 , 8 }} },
2490+ paged_attention_test_params{ {{4 , 32 }, {128 , 0 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , DISABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, ov::element::dynamic, true , QueryToQueryAttentionDescriptor{{{{1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 }}}, {0 , 4 , 4 }} },
2491+ paged_attention_test_params{ {{128 , 0 }, {4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , DISABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, ov::element::dynamic, true , QueryToQueryAttentionDescriptor{{{{1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 }}}, {0 , 0 , 4 }} },
2492+ paged_attention_test_params{ {{4 , 20 }, {4 , 32 }}, 2 , 2 , 64 , 64 , 16 , {100.0 }, 0 , false , DISABLE_CACHE_COMPRESSION, ov::internal::CacheQuantMode::BY_TOKEN, STATIC_INPUT_PAD, ENABLE_SCORES, DISABLE_ROTATION, DISABLE_FA_V2, DISABLE_DIVERSITY, 0 , {}, ov::element::dynamic, true , QueryToQueryAttentionDescriptor{{{{1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 }, {1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 }}}, {0 , 4 , 8 }} },
25132493}));
0 commit comments