Skip to content

Commit dd777a0

Browse files
committed
Code review update
Besides, fix functional test for sdpa also.
1 parent 7d38f6f commit dd777a0

File tree

6 files changed

+85
-93
lines changed

6 files changed

+85
-93
lines changed

src/plugins/intel_gpu/src/graph/impls/ocl/scaled_dot_product_attention.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,9 @@ struct scaled_dot_product_attention_impl : multi_stage_primitive<scaled_dot_prod
268268
if (query_shape[query_shape.size() - 1].is_static())
269269
config.k_head_size = query_shape[query_shape.size() - 1].get_length();
270270

271+
if (value_shape[value_shape.size() - 1].is_static())
272+
config.v_head_size = value_shape[value_shape.size() - 1].get_length();
273+
271274
config.is_causal = desc->is_causal;
272275

273276
if (desc->scale_val.has_value()) {

src/plugins/intel_gpu/src/graph/paged_attention.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,19 @@ layout paged_attention_inst::calc_output_layout(const paged_attention_node& /*no
2121

2222
template<typename ShapeType>
2323
std::vector<layout> paged_attention_inst::calc_output_layouts(paged_attention_node const& /*node*/, kernel_impl_params const& impl_param) {
24-
auto q_layout = impl_param.get_input_layout(0);
25-
auto v_layout = impl_param.get_input_layout(2);
24+
const auto& q_layout = impl_param.get_input_layout(0);
25+
const auto& v_layout = impl_param.get_input_layout(2);
2626
auto data_layout = q_layout;
2727

2828
if (v_layout.is_static()) {
29-
ShapeType v_shape = v_layout.get_shape();
30-
data_layout = layout{v_shape, data_layout.data_type, data_layout.format};
29+
const auto& key_cache_ps = impl_param.get_input_layout(3).get_partial_shape();
30+
const auto& value_cache_ps = impl_param.get_input_layout(4).get_partial_shape();
31+
// output layout may similar to value layout if key and value has different head size
32+
if (key_cache_ps[2].get_length() != value_cache_ps[3].get_length() ||
33+
key_cache_ps[3].get_length() != value_cache_ps[2].get_length()) {
34+
ShapeType v_shape = v_layout.get_shape();
35+
data_layout = data_layout.clone_with_other_shape(v_shape);
36+
}
3137
}
3238

3339
data_layout.data_padding = padding();

src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_kv_cache_update_ref.cl

Lines changed: 41 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,21 @@
44

55
#include "include/batch_headers/common.cl"
66

7-
inline void FUNC(quantize_and_save_k)(__global const INPUT0_TYPE* in_data,
7+
inline void FUNC(quantize_and_save)(__global const INPUT0_TYPE* in_data,
88
const uint in_data_offset,
99
__global OUTPUT_TYPE* out_data,
1010
const uint out_data_offset,
1111
const uint out_data_pitch,
1212
const uint comp_offset,
1313
const uint token_pos_in_block,
14-
const uint sglid) {
15-
INPUT0_TYPE input_data[K_HEAD_SIZE / SUBGROUP_SIZE];
14+
const uint sglid,
15+
const uint num_groups,
16+
INPUT0_TYPE* input_data) {
1617
INPUT0_TYPE grp_max = 0.001;
1718
INPUT0_TYPE max_value = INPUT0_VAL_MIN;
1819
INPUT0_TYPE min_value = INPUT0_VAL_MAX;
1920

20-
unroll_for (uint i = 0; i < K_HEAD_SIZE / SUBGROUP_SIZE; i++) {
21+
unroll_for (uint i = 0; i < num_groups; i++) {
2122
input_data[i] = BLOCK_READN(INPUT0_TYPE, 1, in_data, in_data_offset + i * SUBGROUP_SIZE);
2223
max_value = fmax(max_value, input_data[i]);
2324
min_value = fmin(min_value, input_data[i]);
@@ -35,54 +36,7 @@ inline void FUNC(quantize_and_save_k)(__global const INPUT0_TYPE* in_data,
3536
INPUT0_TYPE zp = (INPUT1_TYPE)(zp_tmp);
3637
#undef ACCUMULATOR_TYPE
3738

38-
unroll_for (uint i = 0; i < K_HEAD_SIZE / SUBGROUP_SIZE; i++) {
39-
OUTPUT_TYPE res = convert_char_rte(input_data[i] * scale + zp);
40-
41-
uint offset = out_data_offset + (i * SUBGROUP_SIZE + sglid) * out_data_pitch;
42-
out_data[offset] = res;
43-
}
44-
45-
INPUT0_TYPE* comp_ptr = out_data + comp_offset;
46-
47-
if (sglid == 0) {
48-
comp_ptr[token_pos_in_block] = 1.0 / scale;
49-
comp_ptr[PAGED_ATTENTION_BLOCK_SIZE + token_pos_in_block] = zp;
50-
}
51-
}
52-
53-
inline void FUNC(quantize_and_save_v)(__global const INPUT0_TYPE* in_data,
54-
const uint in_data_offset,
55-
__global OUTPUT_TYPE* out_data,
56-
const uint out_data_offset,
57-
const uint out_data_pitch,
58-
const uint comp_offset,
59-
const uint token_pos_in_block,
60-
const uint sglid) {
61-
INPUT0_TYPE input_data[V_HEAD_SIZE / SUBGROUP_SIZE];
62-
INPUT0_TYPE grp_max = 0.001;
63-
INPUT0_TYPE max_value = INPUT0_VAL_MIN;
64-
INPUT0_TYPE min_value = INPUT0_VAL_MAX;
65-
66-
unroll_for (uint i = 0; i < V_HEAD_SIZE / SUBGROUP_SIZE; i++) {
67-
input_data[i] = BLOCK_READN(INPUT0_TYPE, 1, in_data, in_data_offset + i * SUBGROUP_SIZE);
68-
max_value = fmax(max_value, input_data[i]);
69-
min_value = fmin(min_value, input_data[i]);
70-
}
71-
72-
min_value = sub_group_reduce_min(min_value);
73-
max_value = sub_group_reduce_max(max_value);
74-
75-
// If the range of input data is zero, it is adjusted to the minimum value(0.001).
76-
#define ACCUMULATOR_TYPE float
77-
ACCUMULATOR_TYPE diff_value = max_value == min_value ? (grp_max) : (max_value - min_value);
78-
ACCUMULATOR_TYPE scale_tmp = (ACCUMULATOR_TYPE)((CHAR_MAX - CHAR_MIN) / diff_value);
79-
ACCUMULATOR_TYPE zp_tmp = (ACCUMULATOR_TYPE)(-min_value * scale_tmp) + CHAR_MIN;
80-
INPUT0_TYPE scale = (INPUT1_TYPE)(scale_tmp);
81-
INPUT0_TYPE zp = (INPUT1_TYPE)(zp_tmp);
82-
#undef ACCUMULATOR_TYPE
83-
84-
85-
unroll_for (uint i = 0; i < V_HEAD_SIZE / SUBGROUP_SIZE; i++) {
39+
unroll_for (uint i = 0; i < num_groups; i++) {
8640
OUTPUT_TYPE res = convert_char_rte(input_data[i] * scale + zp);
8741

8842
uint offset = out_data_offset + (i * SUBGROUP_SIZE + sglid) * out_data_pitch;
@@ -178,11 +132,19 @@ KERNEL(pa_kv_cache_update)(
178132
}
179133

180134
#else // IS_KV_COMPRESSED
181-
// key processing
182-
FUNC_CALL(quantize_and_save_k)(key_data, key_in_offset, key_cache_data, key_out_offset, PAGED_ATTENTION_BLOCK_SIZE, comp_k_offset, current_token_pos_in_block, sglid);
135+
{
136+
// key processing
137+
INPUT0_TYPE input_data[K_HEAD_SIZE / SUBGROUP_SIZE];
138+
FUNC_CALL(quantize_and_save)(key_data, key_in_offset, key_cache_data, key_out_offset, PAGED_ATTENTION_BLOCK_SIZE, comp_k_offset,
139+
current_token_pos_in_block, sglid, K_HEAD_SIZE / SUBGROUP_SIZE, &input_data[0]);
140+
}
183141

184-
// value processing
185-
FUNC_CALL(quantize_and_save_v)(value_data, value_in_offset, value_cache_data, value_out_offset, 1, comp_v_offset, current_token_pos_in_block, sglid);
142+
{
143+
// value processing
144+
INPUT0_TYPE input_data[V_HEAD_SIZE / SUBGROUP_SIZE];
145+
FUNC_CALL(quantize_and_save)(value_data, value_in_offset, value_cache_data, value_out_offset, 1, comp_v_offset,
146+
current_token_pos_in_block, sglid, V_HEAD_SIZE / SUBGROUP_SIZE, &input_data[0]);
147+
}
186148
#endif // IS_KV_COMPRESSED
187149
} else {
188150
// 1st token
@@ -343,11 +305,19 @@ KERNEL(pa_kv_cache_update)(
343305
}
344306

345307
#else // IS_KV_COMPRESSED
308+
{
346309
// key processing
347-
FUNC_CALL(quantize_and_save_k)(key_data, key_in_offset, key_cache_data, key_out_offset, PAGED_ATTENTION_BLOCK_SIZE, comp_k_offset, token_num, sglid);
310+
INPUT0_TYPE input_data[K_HEAD_SIZE / SUBGROUP_SIZE];
311+
FUNC_CALL(quantize_and_save)(key_data, key_in_offset, key_cache_data, key_out_offset, PAGED_ATTENTION_BLOCK_SIZE,
312+
comp_k_offset, token_num, sglid, K_HEAD_SIZE / SUBGROUP_SIZE, &input_data[0]);
313+
}
348314

315+
{
349316
// value processing
350-
FUNC_CALL(quantize_and_save_v)(value_data, value_in_offset, value_cache_data, value_out_offset, 1, comp_v_offset, token_num, sglid);
317+
INPUT0_TYPE input_data[V_HEAD_SIZE / SUBGROUP_SIZE];
318+
FUNC_CALL(quantize_and_save)(value_data, value_in_offset, value_cache_data, value_out_offset, 1,
319+
comp_v_offset, token_num, sglid, V_HEAD_SIZE / SUBGROUP_SIZE, &input_data[0]);
320+
}
351321
#endif // IS_KV_COMPRESSED
352322

353323
key_in_offset += (KV_HEADS_NUM * K_HEAD_SIZE + INPUT0_PAD_AFTER_FEATURE_NUM + INPUT0_PAD_BEFORE_FEATURE_NUM);
@@ -379,14 +349,22 @@ KERNEL(pa_kv_cache_update)(
379349
uint value_offset = value_out_offset + head_idx_index + sglid + SUBGROUP_SIZE * i;
380350
value_cache_data[value_offset] = input_data;
381351
}
382-
}
352+
}
383353

384354
#else // IS_KV_COMPRESSED
385-
// key processing
386-
FUNC_CALL(quantize_and_save_k)(key_data, key_in_offset, key_cache_data, key_out_offset, PAGED_ATTENTION_BLOCK_SIZE, comp_k_offset, token_start_pos + token_num, sglid);
355+
{
356+
// key processing
357+
INPUT0_TYPE input_data[K_HEAD_SIZE / SUBGROUP_SIZE];
358+
FUNC_CALL(quantize_and_save)(key_data, key_in_offset, key_cache_data, key_out_offset, PAGED_ATTENTION_BLOCK_SIZE,
359+
comp_k_offset, token_start_pos + token_num, sglid, K_HEAD_SIZE / SUBGROUP_SIZE, &input_data[0]);
360+
}
387361

388-
// value processing
389-
FUNC_CALL(quantize_and_save_v)(value_data, value_in_offset, value_cache_data, value_out_offset, 1, comp_v_offset, token_start_pos + token_num, sglid);
362+
{
363+
// value processing
364+
INPUT0_TYPE input_data[V_HEAD_SIZE / SUBGROUP_SIZE];
365+
FUNC_CALL(quantize_and_save)(value_data, value_in_offset, value_cache_data, value_out_offset, 1,
366+
comp_v_offset, token_start_pos + token_num, sglid, V_HEAD_SIZE / SUBGROUP_SIZE, &input_data[0]);
367+
}
390368
#endif // IS_KV_COMPRESSED
391369
key_in_offset += (KV_HEADS_NUM * K_HEAD_SIZE + INPUT0_PAD_AFTER_FEATURE_NUM + INPUT0_PAD_BEFORE_FEATURE_NUM);
392370
value_in_offset += (KV_HEADS_NUM * V_HEAD_SIZE + INPUT1_PAD_AFTER_FEATURE_NUM + INPUT1_PAD_BEFORE_FEATURE_NUM);

src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ JitConstants SDPAKernelBase::GetJitConstants(const sdpa_params& params) const {
137137

138138
TransposedDimensionAccessHelperJit dims_q(params.inputs[0], params.input0_order);
139139
const auto num_heads = params.conf.is_paged_attention ? std::to_string(params.conf.heads_num) : dims_q.f();
140-
// TransposedDimensionAccessHelperJit dims_v(params.inputs[2], params.input2_order);
141140
jit.AddConstant(MakeJitConstant("TARGET_SEQ_LEN", dims_q.y()));
142141
jit.AddConstant(MakeJitConstant("NUM_HEADS", num_heads));
143142
jit.AddConstant(MakeJitConstant("NUM_KV_HEADS", params.conf.kv_heads_num));

src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_micro.cpp

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -254,11 +254,12 @@ void SDPAKernelMicro::init_microkernels(const sdpa_params& params, micro::Packag
254254
const auto& V = params.inputs[2];
255255

256256
auto& out = params.outputs[0];
257-
const auto head_size = params.conf.k_head_size;
258-
const auto d_max = get_d_max(head_size);
257+
const auto k_head_size = params.conf.k_head_size;
258+
const auto v_head_size = params.conf.v_head_size;
259+
const auto d_max = get_d_max(k_head_size);
259260
const Tensor::Dim n_keys = get_seq_length(params, K, params.input1_order);
260261
const Tensor::Dim n_queries = get_seq_length(params, Q, params.input0_order);
261-
const Tensor::Dim n_values = Tensor::Dim(head_size);
262+
const Tensor::Dim n_values = Tensor::Dim(v_head_size);
262263
const auto batch = out.Batch().v * out.Feature().v;
263264

264265
/* Retrieve pre-tuned kernel configuration */
@@ -269,13 +270,15 @@ void SDPAKernelMicro::init_microkernels(const sdpa_params& params, micro::Packag
269270
(V.GetDType() == Datatype::UINT8 || V.GetDType() == Datatype::INT8);
270271
switch (params.engineInfo.arch) {
271272
case gpu_arch::xe_hpg: {
272-
config = choose_config_xehpg(static_cast<int32_t>(head_size), static_cast<int32_t>(n_keys.v), thin_q, is_quantized, params.conf.is_paged_attention);
273+
config = choose_config_xehpg(static_cast<int32_t>(k_head_size), static_cast<int32_t>(n_keys.v), thin_q,
274+
is_quantized, params.conf.is_paged_attention);
273275
break;
274276
}
275277
case gpu_arch::xe_hpc:
276278
case gpu_arch::xe2:
277279
case gpu_arch::xe3: {
278-
config = choose_config_xehpc(static_cast<int32_t>(head_size), static_cast<int32_t>(n_keys.v), thin_q, is_quantized, params.conf.is_paged_attention);
280+
config = choose_config_xehpc(static_cast<int32_t>(k_head_size), static_cast<int32_t>(n_keys.v), thin_q,
281+
is_quantized, params.conf.is_paged_attention);
279282
break;
280283
}
281284
default: break;
@@ -334,7 +337,7 @@ void SDPAKernelMicro::init_microkernels(const sdpa_params& params, micro::Packag
334337

335338
problem_kq.B.layout = micro::MatrixLayout::Pr;
336339
problem_kq.C.layout = micro::MatrixLayout::T;
337-
problem_kq.A.setAlignment(micro::alignment_for_ld(head_size * problem.Ta));
340+
problem_kq.A.setAlignment(micro::alignment_for_ld(k_head_size * problem.Ta));
338341
problem_kq.B.setAlignment(64); // Q is packed in VNNI format in SLM
339342
problem_kq.B.crosspack = 2;
340343
problem_kq.B.tileR = d_max;
@@ -344,7 +347,7 @@ void SDPAKernelMicro::init_microkernels(const sdpa_params& params, micro::Packag
344347
micro::SizeParams sizes;
345348
sizes.m = static_cast<int64_t>(n_keys.v);
346349
sizes.n = static_cast<int64_t>(n_queries.v);
347-
sizes.k = static_cast<int64_t>(head_size);
350+
sizes.k = static_cast<int64_t>(k_head_size);
348351
sizes.batch = static_cast<int64_t>(batch);
349352

350353
/* Set up microkernel requirements */
@@ -390,7 +393,7 @@ void SDPAKernelMicro::init_microkernels(const sdpa_params& params, micro::Packag
390393
}
391394

392395
if (params.conf.is_kv_compressed) {
393-
problem_vs.aqGroupM = (vs_common_scales || vs_common_zp) ? 1 : micro::rnd_up_pow2(params.conf.k_head_size);
396+
problem_vs.aqGroupM = (vs_common_scales || vs_common_zp) ? 1 : micro::rnd_up_pow2(v_head_size);
394397
problem_vs.aqGroupK = 1;
395398
}
396399

@@ -399,7 +402,7 @@ void SDPAKernelMicro::init_microkernels(const sdpa_params& params, micro::Packag
399402

400403
problem_vs.B.layout = micro::MatrixLayout::Pr;
401404
problem_vs.C.layout = micro::MatrixLayout::N;
402-
problem_vs.A.setAlignment(micro::alignment_for_ld(head_size * problem.Ta));
405+
problem_vs.A.setAlignment(micro::alignment_for_ld(v_head_size * problem.Ta));
403406
problem_vs.B.setAlignment(64); // S is packed in SLM
404407
problem_vs.B.crosspack = 16;
405408
sizes.m = static_cast<int64_t>(n_values.v);
@@ -474,6 +477,9 @@ bool SDPAKernelMicro::Validate(const Params& p) const {
474477
if (params.conf.k_head_size > 256)
475478
return false;
476479

480+
if (params.conf.v_head_size > 256)
481+
return false;
482+
477483
// TODO: To support sdpa_micro kernel with non-const scalar mask / scale inputs
478484
if (!params.conf.is_paged_attention) {
479485
const auto mask_idx = 3lu;
@@ -512,18 +518,18 @@ JitConstants SDPAKernelMicro::GetJitConstants(const sdpa_params& params, const m
512518
const auto& K = prim_params.inputs[1];
513519
const auto& V = prim_params.inputs[2];
514520

515-
const auto head_size = prim_params.conf.k_head_size;
516-
const auto v_head_size = prim_params.conf.k_head_size;
521+
const auto k_head_size = prim_params.conf.k_head_size;
522+
const auto v_head_size = prim_params.conf.v_head_size;
517523

518-
auto ldq = head_size * Q.ElementSize();
519-
auto ldk = head_size * K.ElementSize();
524+
auto ldq = k_head_size * Q.ElementSize();
525+
auto ldk = k_head_size * K.ElementSize();
520526
auto ldv = v_head_size * V.ElementSize();
521-
auto lda = head_size * prim_params.outputs[0].ElementSize();
527+
auto lda = k_head_size * prim_params.outputs[0].ElementSize();
522528

523-
const auto d_max = get_d_max(head_size);
529+
const auto d_max = get_d_max(k_head_size);
524530
const auto n_keys = get_seq_length(params, K, prim_params.input1_order);
525531
const auto n_queries = get_seq_length(params, Q, prim_params.input0_order);
526-
const auto n_values = Tensor::Dim(head_size);
532+
const auto n_values = Tensor::Dim(v_head_size);
527533

528534
auto data_inputs = params.inputs.size();
529535
if (params.conf.is_paged_attention)
@@ -533,7 +539,7 @@ JitConstants SDPAKernelMicro::GetJitConstants(const sdpa_params& params, const m
533539
jit.AddConstant(MakeJitConstant("SUBGROUP_SIZE", subgroup_size(prim_params.engineInfo.arch)));
534540
jit.AddConstant(MakeJitConstant("INVERT_SCALE", false));
535541
jit.AddConstant(MakeJitConstant("SCALE_DATA_T", "half"));
536-
jit.AddConstant(MakeJitConstant("HEAD_SIZE", head_size));
542+
jit.AddConstant(MakeJitConstant("HEAD_SIZE", k_head_size));
537543

538544
size_t attn_input_idx = 3;
539545
size_t scale_input_idx = 4;
@@ -616,8 +622,8 @@ JitConstants SDPAKernelMicro::GetJitConstants(const sdpa_params& params, const m
616622
int tile_q = gemm_kq.getSetting("wg_tile_n");
617623
int tile_v = gemm_vs.getSetting("wg_tile_m");
618624

619-
bool d_full = (head_size == d_max);
620-
bool v_full = (head_size == tile_v);
625+
bool d_full = (k_head_size == d_max);
626+
bool v_full = (v_head_size == tile_v);
621627
bool k_full = !n_keys.is_dynamic && (n_keys.v % tile_k) == 0;
622628
bool q_full = !n_queries.is_dynamic && (n_queries.v % tile_q) == 0;
623629

@@ -814,11 +820,11 @@ clKernelData SDPAKernelMicro::get_kernel_data(const sdpa_params& params, bool is
814820
const auto n_queries = get_seq_length(params, Q, params.input0_order);
815821
const auto n_keys = get_seq_length(params, K, params.input1_order);
816822

817-
auto head_size = params.conf.k_head_size;
823+
auto k_head_size = params.conf.k_head_size;
818824

819825
ScalarDescriptor s_d;
820826
s_d.t = ScalarDescriptor::Types::INT32;
821-
s_d.v.s32 = static_cast<uint32_t>(head_size);
827+
s_d.v.s32 = static_cast<uint32_t>(k_head_size);
822828

823829
ScalarDescriptor s_k;
824830
s_k.t = ScalarDescriptor::Types::INT32;
@@ -890,11 +896,11 @@ void SDPAKernelMicro::GetUpdateDispatchDataFunc(KernelData& kd) const {
890896
const auto n_queries = get_seq_length(prim_params, Q, prim_params.input0_order);
891897
const auto n_keys = get_seq_length(prim_params, K, prim_params.input1_order);
892898

893-
auto head_size = prim_params.conf.k_head_size;
899+
auto k_head_size = prim_params.conf.k_head_size;
894900

895901
ScalarDescriptor s_d;
896902
s_d.t = ScalarDescriptor::Types::INT32;
897-
s_d.v.s32 = static_cast<uint32_t>(head_size);
903+
s_d.v.s32 = static_cast<uint32_t>(k_head_size);
898904

899905
ScalarDescriptor s_k;
900906
s_k.t = ScalarDescriptor::Types::INT32;

src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_ref.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ JitConstants SDPAKernelRef::GetJitConstants(const sdpa_params& params) const {
4949
jit.AddConstant(MakeJitConstant("STATIC_SCALE_VALUE", params.conf.scale_val));
5050
jit.AddConstant(MakeJitConstant("STATIC_SCALE_VALUE_INV", 1.0f / params.conf.scale_val));
5151
} else {
52-
jit.AddConstant(MakeJitConstant("STATIC_SCALE_VALUE_INV", std::sqrt(static_cast<float>(params.conf.head_size))));
53-
jit.AddConstant(MakeJitConstant("STATIC_SCALE_VALUE", 1.0f / std::sqrt(static_cast<float>(params.conf.head_size))));
52+
jit.AddConstant(MakeJitConstant("STATIC_SCALE_VALUE_INV", std::sqrt(static_cast<float>(params.conf.k_head_size))));
53+
jit.AddConstant(MakeJitConstant("STATIC_SCALE_VALUE", 1.0f / std::sqrt(static_cast<float>(params.conf.k_head_size))));
5454
}
5555

5656
return jit;

0 commit comments

Comments
 (0)