@@ -2399,7 +2399,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
2399
2399
ggml_vk_create_pipeline(device, device->pipeline_mul_mat_vec_p021_f16_f32[i], "mul_mat_vec_p021_f16_f32"+std::to_string(i+1), mul_mat_vec_p021_f16_f32_len, mul_mat_vec_p021_f16_f32_data, "main", 3, 6 * sizeof(uint32_t), {1, 1, 1}, {device->subgroup_size, i + 1}, 1, true);
2400
2400
}
2401
2401
}
2402
- ggml_vk_create_pipeline(device, device->pipeline_mul_mat_vec_nc_f16_f32, "mul_mat_vec_nc_f16_f32", mul_mat_vec_nc_f16_f32_len, mul_mat_vec_nc_f16_f32_data, "main", 3, 7 * sizeof(uint32_t), {1, 1, 1}, {}, 1);
2402
+ ggml_vk_create_pipeline(device, device->pipeline_mul_mat_vec_nc_f16_f32, "mul_mat_vec_nc_f16_f32", mul_mat_vec_nc_f16_f32_len, mul_mat_vec_nc_f16_f32_data, "main", 3, 9 * sizeof(uint32_t), {1, 1, 1}, {}, 1);
2403
2403
2404
2404
ggml_vk_create_pipeline(device, device->pipeline_norm_f32, "norm_f32", norm_f32_len, norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1);
2405
2405
ggml_vk_create_pipeline(device, device->pipeline_group_norm_f32, "group_norm_f32", group_norm_f32_len, group_norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1);
@@ -4949,6 +4949,8 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
4949
4949
const uint64_t nb01 = src0->nb[1];
4950
4950
const uint64_t nb02 = src0->nb[2];
4951
4951
4952
+ const uint64_t nb12 = src1->nb[2];
4953
+
4952
4954
// const uint64_t ne10 = src1->ne[0];
4953
4955
const uint64_t ne11 = src1->ne[1];
4954
4956
const uint64_t ne12 = src1->ne[2];
@@ -4974,6 +4976,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
4974
4976
4975
4977
const uint32_t row_stride_x = nb01 / sizeof(ggml_fp16_t);
4976
4978
const uint32_t channel_stride_x = nb02 / sizeof(ggml_fp16_t);
4979
+ const uint32_t channel_stride_y = nb12 / sizeof(float);
4977
4980
4978
4981
const uint64_t qx_sz = ggml_nbytes(src0);
4979
4982
const uint64_t qy_sz = ggml_nbytes(src1);
@@ -5004,7 +5007,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
5004
5007
const uint64_t d_shader_offset = d_buf_offset - d_buffer_offset;
5005
5008
5006
5009
// compute
5007
- const std::array<uint32_t, 7 > pc = { (uint32_t)ne00, (uint32_t)ne01, row_stride_x, channel_stride_x, (uint32_t)(ne12 / ne02), (uint32_t)(qy_shader_offset / ggml_type_size(src1->type)), (uint32_t)(d_shader_offset / ggml_type_size(dst->type)) };
5010
+ const std::array<uint32_t, 9 > pc = { (uint32_t)ne00, (uint32_t)ne01, row_stride_x, channel_stride_x, channel_stride_y, (uint32_t)(ne12 / ne02), (uint32_t)ne12 , (uint32_t)(qy_shader_offset / ggml_type_size(src1->type)), (uint32_t)(d_shader_offset / ggml_type_size(dst->type)) };
5008
5011
ggml_vk_sync_buffers(subctx);
5009
5012
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_nc_f16_f32,
5010
5013
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
0 commit comments