Skip to content

Feature/par job xcore 300 400 #186

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Separate out parallel invokes to C file
  • Loading branch information
panickal committed May 15, 2025
commit 5ea1e8835081bc80cd7ddedd1f61c985917157e9
89 changes: 89 additions & 0 deletions lib_tflite_micro/src/par_invoke_funcs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@

#include "/Users/deepakpanickal/code/ai_tools/.venv/lib/python3.10/site-packages/xmos_ai_tools/runtime/include/lib_tflite_micro/src/thread_call.h"
// #include "xcore/parallel.h"


#if defined(__xcore__) || defined(__riscv_xxcore)
#include <xcore/parallel.h>
DECLARE_JOB(main_task, (thread_info_t *, synchronizer_t));
DECLARE_JOB(client_task, (thread_info_t *, int));
#if defined(__xcore__)
asm(".linkset __xcore_ugs_shim_main_task.nstackwords, 1024");
#endif
#endif

extern void invoke_subgraph_c_trampoline();

void main_task(thread_info_t *t, synchronizer_t sync) {
thread_store_sync(t, sync);
invoke_subgraph_c_trampoline();
}

void client_task(thread_info_t *t, int n) {
thread_client(t, n);
}

void par_invoke_1(thread_info_t *ti) {
#ifdef __xcore__
PAR_JOBS(
PJOB(main_task, (ti, PAR_SYNC)));
#else
main_task(ti, 0);
#endif
}

void par_invoke_2(thread_info_t *ti) {
#ifdef __xcore__
PAR_JOBS(
PJOB(main_task, (ti, PAR_SYNC)),
PJOB(client_task, (ti, 0)));
#else
client_task(ti, 0);
main_task(ti, 0);
#endif
}

void par_invoke_3(thread_info_t *ti) {
#ifdef __xcore__
PAR_JOBS(
PJOB(main_task, (ti, PAR_SYNC)),
PJOB(client_task, (ti, 0)),
PJOB(client_task, (ti, 1)));
#else
client_task(ti, 0);
client_task(ti, 1);
main_task(ti, 0);
#endif
}

void par_invoke_4(thread_info_t *ti) {
#ifdef __xcore__
PAR_JOBS(
PJOB(main_task, (ti, PAR_SYNC)),
PJOB(client_task, (ti, 0)),
PJOB(client_task, (ti, 1)),
PJOB(client_task, (ti, 2)));
#else
client_task(ti, 0);
client_task(ti, 1);
client_task(ti, 2);
main_task(ti, 0);
#endif
}

void par_invoke_5(thread_info_t *ti) {
#ifdef __xcore__
PAR_JOBS(
PJOB(main_task, (ti, PAR_SYNC)),
PJOB(client_task, (ti, 0)),
PJOB(client_task, (ti, 1)),
PJOB(client_task, (ti, 2)),
PJOB(client_task, (ti, 3)));
#else
client_task(ti, 0);
client_task(ti, 1);
client_task(ti, 2);
client_task(ti, 3);
main_task(ti, 0);
#endif
}
34 changes: 27 additions & 7 deletions tflite_micro_compiler/src/Compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1411,12 +1411,33 @@ TfLiteStatus )"
return )" << prefix_ << R"(init_with_paging(weights_data_ptr, nullptr);
}

#ifdef __VX4A__
#define STACKFUNCTION(FN, BYTES) \
asm(".globl " # FN ); \
asm(".resource_list_empty " # FN ", \"callees\""); \
asm(".resource_list_empty " # FN ", \"tail_callees\""); \
asm(".resource_list_empty " # FN ", \"parallel_callees\""); \
asm(".resource_const " # FN ", \"stack_frame_bytes\", " # BYTES);

STACKFUNCTION(_Z22model_init_with_pagingPvS_, 1000);
// STACKFUNCTION(_Z12model_invokev, 1000);
STACKFUNCTION(__call_exitprocs_impl, 1000);
STACKFUNCTION(invoke_subgraph_c_trampoline, 1000);
// STACKFUNCTION(_Z10model_initPv);
#endif

TfLiteStatus mg_status;
extern "C" void invoke_subgraph_c_trampoline(){
mg_status = mg_InvokeSubgraph(0);
}

extern "C" void par_invoke_)"
<< numXCThreads_ << R"((void *thread_info);

)";
wr<<R"(#pragma stackfunction 1000
TfLiteStatus )"
<< prefix_ << R"(invoke() {
thread_init_)"
<< numXCThreads_ << R"((&xc_config.thread_info);

#ifdef TFLMC_XCORE_PROFILE
printf("\n\n\nProfiling invoke()...\n");
Expand All @@ -1425,12 +1446,11 @@ TfLiteStatus )"
op_times_summed = 0;
#endif

TfLiteStatus status = mg_InvokeSubgraph(0);
if (status != kTfLiteOk) {
return status;
par_invoke_)"
<< numXCThreads_ << R"((&xc_config.thread_info);
if (mg_status != kTfLiteOk) {
return mg_status;
}

thread_destroy(&xc_config.thread_info);
)";
if (has_xc_conv_ops) {
wr << R"(
Expand Down