Open
Description
On Android, run mul_mat with matrix A(200x200) and matrix B(200x200), and element values are all 1 of A and B. expect result a 200x200 matrix with all element values 200.
when run with CPU, the result is OK.
when run with Vulkan, the result is wrong. but the result has some special pattern, 0-63 rows is right, 64-127 rows are all 0, 128-191 rows is right, 192-199 rows are all 0.
source code of mul_mat:
#include "ggml.h"
#include "ggml-cpu.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#if GGML_USE_VULKAN
#include "ggml-vulkan.h"
#endif
#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <map>
#include <string>
#include <vector>
static void ggml_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
(void) level;
(void) user_data;
fputs(text, stderr);
fflush(stderr);
}
// This is a simple model with two tensors a and b
struct simple_model {
struct ggml_tensor * a;
struct ggml_tensor * b;
// the backend to perform the computation (CPU, CUDA, METAL)
ggml_backend_t backend = NULL;
// the backend buffer to storage the tensors data of a and b
ggml_backend_buffer_t buffer;
// the context to define the tensor information (dimensions, size, memory address)
struct ggml_context * ctx;
};
// initialize the tensors of the model in this case two matrices 2x2
void load_model(simple_model & model, float * a, float * b, int rows_A, int cols_A, int rows_B, int cols_B) {
ggml_log_set(ggml_log_callback_default, nullptr);
// initialize the backend
#ifdef GGML_USE_VULKAN
fprintf(stderr, "%s: using Vulkan backend\n", __func__);
model.backend = ggml_backend_vk_init(0);
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_vk_init(0) failed\n", __func__);
}
#endif
// if there aren't GPU Backends fallback to CPU backend
if (!model.backend) {
model.backend = ggml_backend_cpu_init();
}
int num_tensors = 2;
struct ggml_init_params params {
/*.mem_size =*/ ggml_tensor_overhead() * num_tensors,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};
// create context
model.ctx = ggml_init(params);
// create tensors
model.a = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, cols_A, rows_A);
model.b = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, cols_B, rows_B);
// create a backend buffer (backend memory) and alloc the tensors from the context
model.buffer = ggml_backend_alloc_ctx_tensors(model.ctx, model.backend);
// load data from cpu memory to backend buffer
ggml_backend_tensor_set(model.a, a, 0, ggml_nbytes(model.a));
ggml_backend_tensor_set(model.b, b, 0, ggml_nbytes(model.b));
}
// build the compute graph to perform a matrix multiplication
struct ggml_cgraph * build_graph(const simple_model& model) {
static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
static std::vector<uint8_t> buf(buf_size);
struct ggml_init_params params0 = {
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf.data(),
/*.no_alloc =*/ true, // the tensors will be allocated later by ggml_allocr_alloc_graph()
};
// create a temporally context to build the graph
struct ggml_context * ctx0 = ggml_init(params0);
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
// result = a*b^T
struct ggml_tensor * result = ggml_mul_mat(ctx0, model.a, model.b);
// build operations nodes
ggml_build_forward_expand(gf, result);
// delete the temporally context used to build the graph
ggml_free(ctx0);
return gf;
}
// compute with backend
struct ggml_tensor * compute(const simple_model & model, ggml_gallocr_t allocr) {
// reset the allocator to free all the memory allocated during the previous inference
struct ggml_cgraph * gf = build_graph(model);
// allocate tensors
ggml_gallocr_alloc_graph(allocr, gf);
int n_threads = 1; // number of threads to perform some operations with multi-threading
if (ggml_backend_is_cpu(model.backend)) {
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}
ggml_backend_graph_compute(model.backend, gf);
// in this case, the output tensor is the last one in the graph
return ggml_graph_node(gf, -1);
}
static void ggml_dump_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, const std::string & filename) {
std::ofstream outfile(filename);
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
float v;
if (type == GGML_TYPE_F16) {
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
} else if (type == GGML_TYPE_F32) {
v = *(float *) &data[i];
} else if (type == GGML_TYPE_I32) {
v = (float) *(int32_t *) &data[i];
} else if (type == GGML_TYPE_I16) {
v = (float) *(int16_t *) &data[i];
} else if (type == GGML_TYPE_I8) {
v = (float) *(int8_t *) &data[i];
} else {
GGML_ABORT("fatal error");
}
if (std::isnan(v)) {
GGML_ABORT("nan got");
}
outfile << v << '\n';
}
}
}
}
}
int main(void) {
ggml_time_init();
// initialize data of matrices to perform matrix multiplication
const int rows_A = 200, cols_A = 200;
std::vector<float> A_data(40000, 1);
float * matrix_A = A_data.data();
const int rows_B = 200, cols_B = 200;
std::vector<float> B_data(40000, 1);
float * matrix_B = B_data.data();
simple_model model;
load_model(model, matrix_A, matrix_B, rows_A, cols_A, rows_B, cols_B);
// calculate the temporaly memory required to compute
ggml_gallocr_t allocr = NULL;
{
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend));
// create the worst case graph for memory usage estimation
struct ggml_cgraph * gf = build_graph(model);
ggml_gallocr_reserve(allocr, gf);
size_t mem_size = ggml_gallocr_get_buffer_size(allocr, 0);
fprintf(stderr, "%s: compute buffer size: %.4f KB\n", __func__, mem_size/1024.0);
}
// perform computation
struct ggml_tensor * result = compute(model, allocr);
// create a array to print result
std::vector<float> out_data(ggml_nelements(result));
// bring the data from the backend memory
ggml_backend_tensor_get(result, out_data.data(), 0, ggml_nbytes(result));
ggml_dump_tensor((uint8_t *)out_data.data(), result->type, result->ne, result->nb, "result.txt");
// release backend memory used for computation
ggml_gallocr_free(allocr);
// free memory
ggml_free(model.ctx);
// release backend memory and free backend
ggml_backend_buffer_free(model.buffer);
ggml_backend_free(model.backend);
return 0;
}
log with GGML_VULKAN_DEBUG=ON
load_model: using Vulkan backend
ggml_backend_vk_init(0)
ggml_vk_init(, 0)
ggml_vk_instance_init()
ggml_vulkan: Found 1 Vulkan devices:
ggml_vk_print_gpu_info(0)
ggml_vulkan: 0 = Turnip Adreno (TM) 750 (turnip Mesa driver) | uma: 1 | fp16: 1 | warp size: 128 | shared memory: 32768 | int dot: 0 | matrix cores: none
ggml_vk_get_device(0)
Initializing new vk_device
ggml_vk_find_queue_family_index()
ggml_vk_find_queue_family_index()
ggml_vk_create_queue()
ggml_vk_load_shaders(Vulkan0)
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=f32, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=f16, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q4_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q4_1, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=DEPRECATED, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=DEPRECATED, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q5_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q5_1, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q8_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q8_1, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q2_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q3_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q4_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q5_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q6_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q8_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq2_xxs, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq2_xs, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq3_xxs, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq1_s, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq4_nl, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq3_s, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq2_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq2_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq2_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq2_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq2_s, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq4_xs, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=i8, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=i16, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=i32, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=i64, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=f64, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq1_m, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=bf16, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=tq1_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=tq2_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=0
ggml_backend_vk_buffer_type(0)
ggml_vk_get_device(0)
ggml_vulkan memory: ggml_backend_vk_buffer_type_alloc_buffer(320000)
ggml_vk_create_buffer(Vulkan0, 320000, { DeviceLocal }, { HostVisible | HostCoherent })
ggml_vulkan memory: Vulkan0: +312.50 KiB device at 0xb400007cd64716f0. Total device: 312.50 KiB, total host: 0 B
ggml_backend_vk_buffer_init_tensor(0xb400007cd6470c40 (0xb400007e264516a0), 0xb400007e66441460)
ggml_backend_vk_buffer_init_tensor(0xb400007cd6470c40 (0xb400007e264516a0), 0xb400007e664415d0)
ggml_backend_vk_buffer_set_tensor(0xb400007cd6470c40, 0xb400007e66441460, 0xb400007ec8a3b000, 0, 160000)
ggml_vk_buffer_write(160000)
ggml_vk_buffer_write_2d(160000, 1)
ggml_vk_create_temporary_context(0xb400007cd6470478)
ggml_vk_ctx_begin(Vulkan0)
ggml_vk_create_cmd_buffer()
ggml_vk_buffer_write_2d_async(160000, 1)
STAGING
ggml_vulkan memory: ggml_vk_ensure_sync_staging_buffer(160000)
ggml_vk_create_buffer(Vulkan0, 160000, { HostVisible | HostCoherent | HostCached }, { HostVisible | HostCoherent })
ggml_vulkan memory: Vulkan0: +156.25 KiB host at 0xb400007cd6470e80. Total device: 312.50 KiB, total host: 156.25 KiB
ggml_vk_sync_buffers()
ggml_vk_ctx_end(0xb400007cd6470478, 1)
ggml_vk_submit(0xb400007cd6470478, 0xb400007dd643fdd0)
ggml_vk_queue_command_pools_cleanup()
ggml_backend_vk_buffer_set_tensor(0xb400007cd6470c40, 0xb400007e664415d0, 0xb400007ec544d000, 0, 160000)
ggml_vk_buffer_write(160000)
ggml_vk_buffer_write_2d(160000, 1)
ggml_vk_create_temporary_context(0xb400007cd6470478)
ggml_vk_ctx_begin(Vulkan0)
ggml_vk_create_cmd_buffer()
ggml_vk_buffer_write_2d_async(160000, 1)
STAGING
ggml_vk_sync_buffers()
ggml_vk_ctx_end(0xb400007cd6470478, 1)
ggml_vk_submit(0xb400007cd6470478, 0xb400007dd643fdd0)
ggml_vk_queue_command_pools_cleanup()
ggml_backend_vk_buffer_type(0)
ggml_vk_get_device(0)
ggml_vulkan memory: ggml_backend_vk_buffer_type_alloc_buffer(160000)
ggml_vk_create_buffer(Vulkan0, 160000, { DeviceLocal }, { HostVisible | HostCoherent })
ggml_vulkan memory: Vulkan0: +156.25 KiB device at 0xb400007cd6470460. Total device: 468.75 KiB, total host: 156.25 KiB
main: compute buffer size: 156.2500 KB
ggml_backend_vk_buffer_init_tensor(0xb400007cd6471780 (0xb400007e26451e20), 0xb400007c0f3f6020)
ggml_backend_vk_graph_compute(1 nodes)
ggml_vk_build_graph(0xb400007c0f3f6020, MUL_MAT)
ggml_vk_mul_mat(0xb400007e66441460, 0xb400007e664415d0, 0xb400007c0f3f6020)
ggml_vk_mul_mat_q_f16((0xb400007e66441460, name=leaf_0, type=0, ne0=200, ne1=200, ne2=1, ne3=1, nb0=4, nb1=800, nb2=160000, nb3=160000), (0xb400007e664415d0, name=leaf_1, type=0, ne0=200, ne1=200, ne2=1, ne3=1, nb0=4, nb1=800, nb2=160000, nb3=160000), (0xb400007c0f3f6020, name=node_0, type=0, ne0=200, ne1=200, ne2=1, ne3=1, nb0=4, nb1=800, nb2=160000, nb3=160000), dryrun)
ggml_vk_get_mul_mat_mat_pipeline(f32, f32, 0)
ggml_vk_guess_matmul_pipeline_align(200, 200, f32, f32)
ggml_vk_guess_matmul_pipeline(200, 200, 1, f32, f32)
ggml_vk_align_size(200, 128)
ggml_vk_guess_matmul_pipeline(200, 200, 0, f32, f32)
ggml_vk_guess_split_k(200, 200, 200)
ggml_vk_get_to_fp16()
ggml_vk_get_to_fp16()
ggml_pipeline_request_descriptor_sets(matmul_f32_f32_l, 1)
ggml_vk_load_shaders(Vulkan0)
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=f32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=f32, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=f16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=f16, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q4_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q4_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q4_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q4_1, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=DEPRECATED, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=DEPRECATED, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=DEPRECATED, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q5_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q5_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q5_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q5_1, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q8_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q8_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q8_1, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q8_1, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q2_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q2_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q3_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q3_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q4_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q4_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q5_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q5_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q6_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q6_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=q8_K, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=q8_K, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq2_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq2_xxs, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq2_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq2_xs, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq3_xxs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq3_xxs, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq1_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq1_s, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq4_nl, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq4_nl, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq3_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq3_s, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq2_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq2_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq2_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq2_s, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq2_s, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq4_xs, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq4_xs, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=i8, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=i8, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=i16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=i16, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=i32, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=i32, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=i64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=i64, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=f64, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=f64, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=iq1_m, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=iq1_m, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=bf16, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=bf16, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=tq1_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=tq1_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=tq2_0, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=tq2_0, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking, supported=0
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=0, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=0, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=0, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,32,32), mul_mat_id=1, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,64,64), mul_mat_id=1, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=1
ggml_vk_matmul_shmem_support(warptile=(128,128,128), mul_mat_id=1, src0_type=TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking, supported=0
ggml_vk_create_pipeline(Vulkan0, matmul_f32_f32_l, main, 3, (128,128,1), specialization_constants, 0, 0, 0)
ggml_vulkan memory: ggml_vk_preallocate_buffers(x_size: 0)
ggml_vk_create_buffer(Vulkan0, 0, { DeviceLocal }, { HostVisible | HostCoherent })
ggml_vulkan memory: ggml_vk_preallocate_buffers(y_size: 0)
ggml_vk_create_buffer(Vulkan0, 0, { DeviceLocal }, { HostVisible | HostCoherent })
ggml_vulkan memory: ggml_vk_preallocate_buffers(split_k_size: 0)
ggml_vk_create_buffer(Vulkan0, 0, { DeviceLocal }, { HostVisible | HostCoherent })
ggml_vk_build_graph(0xb400007c0f3f6020, MUL_MAT)
ggml_vk_create_context(0xb400007cd646fae8)
ggml_vk_ctx_begin(Vulkan0)
ggml_vk_create_cmd_buffer()
ggml_vk_mul_mat(0xb400007e66441460, 0xb400007e664415d0, 0xb400007c0f3f6020)
ggml_vk_mul_mat_q_f16((0xb400007e66441460, name=leaf_0, type=0, ne0=200, ne1=200, ne2=1, ne3=1, nb0=4, nb1=800, nb2=160000, nb3=160000), (0xb400007e664415d0, name=leaf_1, type=0, ne0=200, ne1=200, ne2=1, ne3=1, nb0=4, nb1=800, nb2=160000, nb3=160000), (0xb400007c0f3f6020, name=node_0, type=0, ne0=200, ne1=200, ne2=1, ne3=1, nb0=4, nb1=800, nb2=160000, nb3=160000), )
ggml_vk_get_mul_mat_mat_pipeline(f32, f32, 0)
ggml_vk_guess_matmul_pipeline_align(200, 200, f32, f32)
ggml_vk_guess_matmul_pipeline(200, 200, 1, f32, f32)
ggml_vk_align_size(200, 128)
ggml_vk_guess_matmul_pipeline(200, 200, 0, f32, f32)
ggml_vk_guess_split_k(200, 200, 200)
ggml_vk_get_to_fp16()
ggml_vk_get_to_fp16()
ggml_vk_matmul(a: (0xb400007cd64716f0, 0, 160000), b: (0xb400007cd64716f0, 160000, 160000), d: (0xb400007cd6470460, 0, 160000), split_k: (0x0, 0, 160000), m: 200, n: 200, k: 200, stride_a: 200, stride_b: 200, stride_d: 200, batch_stride_a: 40000, batch_stride_b: 40000, batch_stride_d: 40000, split_k: 1, batch: 1, ne02: 1, ne12: 1, broadcast2: 1, broadcast3: 1, padded_n: 200)
ggml_vk_sync_buffers()
ggml_vk_dispatch_pipeline(matmul_f32_f32_l, {(0xb400007cd64716f0, 0, 160000), (0xb400007cd64716f0, 160000, 160000), (0xb400007cd6470460, 0, 160000), }, (2,2,1))
ggml_vk_ctx_end(0xb400007cd646fae8, 1)
ggml_vk_compute_forward(0xb400007c0f3f6020, name=node_0, op=MUL_MAT, type=0, ne0=200, ne1=200, ne2=1, ne3=1, nb0=4, nb1=800, nb2=160000, nb3=160000, view_src=0x0, view_offs=0)
ggml_vk_check_results_0(node_0)
ggml_vk_buffer_read(0xb400007cd64716f0, 0, 160000)
ggml_vk_create_temporary_context(0xb400007cd6471678)
ggml_vk_ctx_begin(Vulkan0)
ggml_vk_create_cmd_buffer()
ggml_vk_buffer_read_2d_async(offset=0, width=160000, height=1)
STAGING
ggml_vk_sync_buffers()
ggml_vk_ctx_end(0xb400007cd6471678, 1)
ggml_vk_submit(0xb400007cd6471678, 0xb400007dd643fdd0)
ggml_vk_queue_command_pools_cleanup()
ggml_vk_buffer_read(0xb400007cd64716f0, 160000, 160000)
ggml_vk_create_temporary_context(0xb400007cd6471678)
ggml_vk_ctx_begin(Vulkan0)
ggml_vk_create_cmd_buffer()
ggml_vk_buffer_read_2d_async(offset=160000, width=160000, height=1)
STAGING
ggml_vk_sync_buffers()
ggml_vk_ctx_end(0xb400007cd6471678, 1)
ggml_vk_submit(0xb400007cd6471678, 0xb400007dd643fdd0)
ggml_vk_queue_command_pools_cleanup()
END ggml_vk_check_results_0(node_0)
ggml_vk_submit(0xb400007cd646fae8, 0xb400007dd6441390)
ggml_vk_check_results_1(node_0)
ggml_vk_buffer_read(0xb400007cd6470460, 0, 160000)
ggml_vk_create_temporary_context(0xb400007cd6471678)
ggml_vk_ctx_begin(Vulkan0)
ggml_vk_create_cmd_buffer()
ggml_vk_buffer_read_2d_async(offset=0, width=160000, height=1)
STAGING
ggml_vk_sync_buffers()
ggml_vk_ctx_end(0xb400007cd6471678, 1)
ggml_vk_submit(0xb400007cd6471678, 0xb400007dd643fdd0)
ggml_vk_queue_command_pools_cleanup()
1 node_0 op=MUL_MAT avg_err=0.36
END ggml_vk_check_results_1(node_0)
ggml_vk_graph_cleanup()
ggml_vk_command_pool_cleanup()
ggml_vk_command_pool_cleanup()
ggml_backend_vk_buffer_get_tensor(0xb400007cd6471780, 0xb400007c0f3f6020, 0xb400007ec3213f00, 0, 160000)
ggml_vk_buffer_read(0xb400007cd6470460, 0, 160000)
ggml_vk_create_temporary_context(0xb400007cd646fae8)
ggml_vk_ctx_begin(Vulkan0)
ggml_vk_create_cmd_buffer()
ggml_vk_buffer_read_2d_async(offset=0, width=160000, height=1)
STAGING
ggml_vk_sync_buffers()
ggml_vk_ctx_end(0xb400007cd646fae8, 1)
ggml_vk_submit(0xb400007cd646fae8, 0xb400007dd643fdd0)
ggml_vk_queue_command_pools_cleanup()
ggml_vulkan memory: ggml_backend_vk_buffer_free_buffer()
ggml_vulkan memory: Vulkan0: -156.25 KiB device at 0xb400007cd6470460. Total device: 312.50 KiB, total host: 156.25 KiB
~vk_buffer_struct(0xb400007cd6470460, 160000)
ggml_vulkan memory: ggml_backend_vk_buffer_free_buffer()
ggml_vulkan memory: Vulkan0: -312.50 KiB device at 0xb400007cd64716f0. Total device: 0 B, total host: 156.25 KiB
~vk_buffer_struct(0xb400007cd64716f0, 320000)
ggml_backend_vk_free(Vulkan0)
ggml_vk_cleanup(Vulkan0)
ggml_vk_graph_cleanup()
ggml_vk_command_pool_cleanup()
ggml_vk_command_pool_cleanup()
result matrix data:
Metadata
Metadata
Assignees
Labels
No labels