@@ -295,7 +295,7 @@ HWY_NOINLINE void Attention(size_t batch_start, size_t batch_idx, size_t layer,
295295 static constexpr size_t kModelDim =
296296 gcpp::Activations<TConfig, kBatchSize >::kModelDim ;
297297 static constexpr size_t kHeads = TConfig::kHeads ;
298- const float kQueryScale = 1.0 / sqrtf (static_cast <float >(kQKVDim ));
298+ static const float kQueryScale = static_cast < float >( 1.0 / sqrt (static_cast <double >(kQKVDim ) ));
299299
300300 pool.Run (0 , kHeads , [&](const uint64_t head, size_t /* thread*/ ) HWY_ATTR {
301301 // linear projections to QKV
@@ -418,7 +418,7 @@ HWY_NOINLINE void Prefill(const int* tokens, size_t num_tokens, size_t pos,
418418 hwy::ThreadPool& inner_pool) {
419419 PROFILER_ZONE (" Gen.Prefill\\ Att\\ FFW" );
420420 static constexpr size_t kModelDim = TConfig::kModelDim ;
421- static const float kEmbScaling = sqrtf ( static_cast <float >(kModelDim ));
421+ static const float kEmbScaling = static_cast <float >(sqrt ( static_cast < double >( kModelDim ) ));
422422
423423 pool.Run (
424424 0 , num_tokens, [&](const uint64_t token_idx, size_t /* thread*/ ) HWY_ATTR {
@@ -473,7 +473,7 @@ void Transformer(int token, size_t pos,
473473 static constexpr size_t kLayers = TConfig::kLayers ;
474474 static constexpr size_t kModelDim = TConfig::kModelDim ;
475475
476- static const float kEmbScaling = sqrtf ( static_cast <float >(kModelDim ));
476+ static const float kEmbScaling = static_cast <float >(sqrt ( static_cast < double >( kModelDim ) ));
477477
478478 Decompress (c_weights.c_embedder_input_embedding , token * kModelDim ,
479479 activations.x .data (), kModelDim );
0 commit comments