Skip to content

Commit 6b0bee4

Browse files
committed
whisper : Add PPC64 big endian support
1 parent 120eccc commit 6b0bee4

File tree

3 files changed

+76
-4
lines changed

3 files changed

+76
-4
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,15 @@ endif
115115
ifeq ($(UNAME_M),amd64)
116116
CFLAGS += -mavx -mavx2 -mfma -mf16c
117117
endif
118-
ifeq ($(UNAME_M),ppc64le)
118+
ifneq ($(filter ppc64%,$(UNAME_M)),)
119119
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
120120
ifneq (,$(findstring POWER9,$(POWER9_M)))
121121
CFLAGS += -mpower9-vector
122122
endif
123+
# Require c++23's std::byteswap for big-endian support.
124+
ifeq ($(UNAME_M),ppc64)
125+
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
126+
endif
123127
endif
124128
ifndef WHISPER_NO_ACCELERATE
125129
# Mac M1 - include Accelerate framework

ggml.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -613,9 +613,12 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
613613
#define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \
614614
vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \
615615
vec_extract_fp32_from_shortl(vec_xl(0, p))
616-
#define GGML_F16_VEC_STORE(p, r, i) \
617-
if (i & 0x1) \
618-
vec_xst(vec_pack_to_short_fp32(r[i], r[i - 1]), 0, p - GGML_F16_EPR)
616+
#define GGML_ENDIAN_BYTE(i) ((unsigned char *)&(uint16_t){1})[i]
617+
#define GGML_F16_VEC_STORE(p, r, i) \
618+
if (i & 0x1) \
619+
vec_xst(vec_pack_to_short_fp32(r[i - GGML_ENDIAN_BYTE(1)], \
620+
r[i - GGML_ENDIAN_BYTE(0)]), \
621+
0, p - GGML_F16_EPR)
619622

620623
#elif defined(__wasm_simd128__)
621624

whisper.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,68 @@
1717
#include <regex>
1818
#include <random>
1919

20+
#if defined(GGML_BIG_ENDIAN)
21+
#include <bit>
22+
23+
template<typename T>
24+
static T byteswap(T value) {
25+
return std::byteswap(value);
26+
}
27+
28+
template<>
29+
float byteswap(float value) {
30+
return std::bit_cast<float>(byteswap(std::bit_cast<std::uint32_t>(value)));
31+
}
32+
33+
template<typename T>
34+
static void byteswap_tensor_data(ggml_tensor * tensor) {
35+
T * datum = reinterpret_cast<T *>(tensor->data);
36+
for (int i = 0; i < ggml_nelements(tensor); i++) {
37+
datum[i] = byteswap(datum[i]);
38+
}
39+
}
40+
41+
static void byteswap_tensor(ggml_tensor * tensor) {
42+
switch (tensor->type) {
43+
case GGML_TYPE_I16: {
44+
byteswap_tensor_data<int16_t>(tensor);
45+
break;
46+
}
47+
case GGML_TYPE_F16: {
48+
byteswap_tensor_data<ggml_fp16_t>(tensor);
49+
break;
50+
}
51+
case GGML_TYPE_I32: {
52+
byteswap_tensor_data<int32_t>(tensor);
53+
break;
54+
}
55+
case GGML_TYPE_F32: {
56+
byteswap_tensor_data<float>(tensor);
57+
break;
58+
}
59+
default: { // GML_TYPE_I8
60+
break;
61+
}
62+
}
63+
}
64+
65+
#define BYTESWAP_VALUE(d) d = byteswap(d)
66+
#define BYTESWAP_VECTOR(v) \
67+
do { \
68+
for (auto & datum : v.data) { \
69+
datum = byteswap(datum); \
70+
} \
71+
} while (0)
72+
#define BYTESWAP_TENSOR(t) \
73+
do { \
74+
byteswap_tensor(tensor); \
75+
} while (0)
76+
#else
77+
#define BYTESWAP_VALUE(d) do {} while (0)
78+
#define BYTESWAP_VECTOR(v) do {} while (0)
79+
#define BYTESWAP_TENSOR(t) do {} while (0)
80+
#endif
81+
2082
#define WHISPER_ASSERT(x) \
2183
do { \
2284
if (!(x)) { \
@@ -521,6 +583,7 @@ struct whisper_context {
521583
template<typename T>
522584
static void read_safe(whisper_model_loader * loader, T & dest) {
523585
loader->read(loader->context, &dest, sizeof(T));
586+
BYTESWAP_VALUE(dest);
524587
}
525588

526589
static bool kv_cache_init(
@@ -733,6 +796,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
733796

734797
filters.data.resize(filters.n_mel * filters.n_fft);
735798
loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float));
799+
BYTESWAP_VECTOR(filters);
736800
}
737801

738802
// load vocab
@@ -1196,6 +1260,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
11961260
}
11971261

11981262
loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
1263+
BYTESWAP_TENSOR(tensor);
11991264

12001265
//printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
12011266
total_size += ggml_nbytes(tensor);

0 commit comments

Comments
 (0)