Skip to content

Commit 6a28a30

Browse files
committed
MINOR: tasks: do not keep cpu and latency times in struct task
It was a mistake to put these two fields in the struct task. This was added in 1.9 via commit 9efd745 ("MEDIUM: tasks: collect per-task CPU time and latency"). These fields are used solely by streams in order to report the measurements via the lat_ns* and cpu_ns* sample fetch functions when task profiling is enabled. For the rest of the tasks, this is pure CPU waste when profiling is enabled, and memory waste 100% of the time, as the point where these latencies and usages are measured is in the profiling array. Let's move the fields to the stream instead, and have process_stream() retrieve the relevant info from the thread's context. The struct task is now back to 120 bytes, i.e. almost two cache lines, with 32 bit still available.
1 parent beee600 commit 6a28a30

File tree

6 files changed

+38
-19
lines changed

6 files changed

+38
-19
lines changed

include/haproxy/stream-t.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ struct stream {
168168
struct list back_refs; /* list of users tracking this stream */
169169
struct buffer_wait buffer_wait; /* position in the list of objects waiting for a buffer */
170170

171+
uint64_t lat_time; /* total latency time experienced */
172+
uint64_t cpu_time; /* total CPU time consumed */
171173
struct freq_ctr call_rate; /* stream task call rate without making progress */
172174

173175
short store_count;

include/haproxy/task-t.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,6 @@ struct task {
113113
short nice; /* task prio from -1024 to +1024 */
114114
short tid; /* TID where it's allowed to run, <0 if anywhere */
115115
uint32_t wake_date; /* date of the last task wakeup */
116-
uint64_t lat_time; /* total latency time experienced */
117-
uint64_t cpu_time; /* total CPU time consumed */
118116
};
119117

120118
/* lightweight tasks, without priority, mainly used for I/Os */

include/haproxy/task.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,6 @@ static inline struct task *task_init(struct task *t, int tid)
498498
t->nice = 0;
499499
t->calls = 0;
500500
t->wake_date = 0;
501-
t->cpu_time = 0;
502-
t->lat_time = 0;
503501
t->expire = TICK_ETERNITY;
504502
#ifdef DEBUG_TASK
505503
t->debug.caller_idx = 0;

src/sample.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4136,7 +4136,7 @@ smp_fetch_cpu_ns_avg(const struct arg *args, struct sample *smp, const char *kw,
41364136
return 0;
41374137

41384138
smp->data.type = SMP_T_SINT;
4139-
smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->cpu_time / smp->strm->task->calls : 0;
4139+
smp->data.u.sint = smp->strm->task->calls ? smp->strm->cpu_time / smp->strm->task->calls : 0;
41404140
return 1;
41414141
}
41424142

@@ -4148,7 +4148,7 @@ smp_fetch_cpu_ns_tot(const struct arg *args, struct sample *smp, const char *kw,
41484148
return 0;
41494149

41504150
smp->data.type = SMP_T_SINT;
4151-
smp->data.u.sint = smp->strm->task->cpu_time;
4151+
smp->data.u.sint = smp->strm->cpu_time;
41524152
return 1;
41534153
}
41544154

@@ -4160,7 +4160,7 @@ smp_fetch_lat_ns_avg(const struct arg *args, struct sample *smp, const char *kw,
41604160
return 0;
41614161

41624162
smp->data.type = SMP_T_SINT;
4163-
smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->lat_time / smp->strm->task->calls : 0;
4163+
smp->data.u.sint = smp->strm->task->calls ? smp->strm->lat_time / smp->strm->task->calls : 0;
41644164
return 1;
41654165
}
41664166

@@ -4172,7 +4172,7 @@ smp_fetch_lat_ns_tot(const struct arg *args, struct sample *smp, const char *kw,
41724172
return 0;
41734173

41744174
smp->data.type = SMP_T_SINT;
4175-
smp->data.u.sint = smp->strm->task->lat_time;
4175+
smp->data.u.sint = smp->strm->lat_time;
41764176
return 1;
41774177
}
41784178

src/stream.c

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer
407407
s->buffer_wait.target = s;
408408
s->buffer_wait.wakeup_cb = stream_buf_available;
409409

410+
s->lat_time = s->cpu_time = 0;
410411
s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0;
411412
s->pcli_next_pid = 0;
412413
s->pcli_flags = 0;
@@ -1556,6 +1557,17 @@ static void stream_update_both_sc(struct stream *s)
15561557
}
15571558
}
15581559

1560+
/* if the current task's wake_date was set, it's being profiled, thus we may
1561+
* report latencies and CPU usages in logs, so it's desirable to update the
1562+
* latency when entering process_stream().
1563+
*/
1564+
static void stream_cond_update_cpu_latency(struct stream *s)
1565+
{
1566+
uint32_t lat = th_ctx->sched_call_date - th_ctx->sched_wake_date;
1567+
1568+
s->lat_time += lat;
1569+
}
1570+
15591571
/* if the current task's wake_date was set, it's being profiled, thus we may
15601572
* report latencies and CPU usages in logs, so it's desirable to do that before
15611573
* logging in order to report accurate CPU usage. In this case we count that
@@ -1573,11 +1585,23 @@ static void stream_cond_update_cpu_usage(struct stream *s)
15731585
return;
15741586

15751587
cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
1576-
s->task->cpu_time += cpu;
1588+
s->cpu_time += cpu;
15771589
HA_ATOMIC_ADD(&th_ctx->sched_profile_entry->cpu_time, cpu);
15781590
th_ctx->sched_wake_date = 0;
15791591
}
15801592

1593+
/* this functions is called directly by the scheduler for tasks whose
1594+
* ->process points to process_stream(), and is used to keep latencies
1595+
* and CPU usage measurements accurate.
1596+
*/
1597+
void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu)
1598+
{
1599+
struct stream *s = t->context;
1600+
s->lat_time += lat;
1601+
s->cpu_time += cpu;
1602+
}
1603+
1604+
15811605
/* This macro is very specific to the function below. See the comments in
15821606
* process_stream() below to understand the logic and the tests.
15831607
*/
@@ -1649,6 +1673,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
16491673
DBG_TRACE_ENTER(STRM_EV_STRM_PROC, s);
16501674

16511675
activity[tid].stream_calls++;
1676+
stream_cond_update_cpu_latency(s);
16521677

16531678
req = &s->req;
16541679
res = &s->res;
@@ -3376,14 +3401,13 @@ static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm
33763401
}
33773402
else if ((tmpctx = sc_appctx(scf)) != NULL) {
33783403
chunk_appendf(&trash,
3379-
" app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
3404+
" app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
33803405
tmpctx,
33813406
tmpctx->st0,
33823407
tmpctx->st1,
33833408
tmpctx->applet->name,
33843409
tmpctx->t->tid,
3385-
tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
3386-
(unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
3410+
tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
33873411
}
33883412

33893413
scb = strm->scb;
@@ -3419,14 +3443,13 @@ static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm
34193443
}
34203444
else if ((tmpctx = sc_appctx(scb)) != NULL) {
34213445
chunk_appendf(&trash,
3422-
" app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
3446+
" app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
34233447
tmpctx,
34243448
tmpctx->st0,
34253449
tmpctx->st1,
34263450
tmpctx->applet->name,
34273451
tmpctx->t->tid,
3428-
tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
3429-
(unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
3452+
tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
34303453
}
34313454

34323455
chunk_appendf(&trash,
@@ -3682,7 +3705,7 @@ static int cli_io_handler_dump_sess(struct appctx *appctx)
36823705
curr_strm->task->state, curr_strm->stream_epoch,
36833706
human_time(now.tv_sec - curr_strm->logs.tv_accept.tv_sec, 1),
36843707
curr_strm->task->calls, read_freq_ctr(&curr_strm->call_rate),
3685-
(unsigned long long)curr_strm->task->cpu_time, (unsigned long long)curr_strm->task->lat_time);
3708+
(unsigned long long)curr_strm->cpu_time, (unsigned long long)curr_strm->lat_time);
36863709

36873710
chunk_appendf(&trash,
36883711
" rq[f=%06xh,i=%u,an=%02xh,rx=%s",

src/task.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <haproxy/tools.h>
2626

2727
extern struct task *process_stream(struct task *t, void *context, unsigned int state);
28+
extern void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu);
2829

2930
DECLARE_POOL(pool_head_task, "task", sizeof(struct task));
3031
DECLARE_POOL(pool_head_tasklet, "tasklet", sizeof(struct tasklet));
@@ -611,7 +612,6 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
611612
uint32_t now_ns = now_mono_time();
612613
uint32_t lat = now_ns - t->wake_date;
613614

614-
t->lat_time += lat;
615615
t->wake_date = 0;
616616
th_ctx->sched_call_date = now_ns;
617617
profile_entry = sched_activity_entry(sched_activity, t->process);
@@ -669,8 +669,6 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
669669
if (unlikely(th_ctx->sched_wake_date)) {
670670
uint32_t cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
671671

672-
if (t)
673-
t->cpu_time += cpu;
674672
HA_ATOMIC_ADD(&profile_entry->cpu_time, cpu);
675673
}
676674

0 commit comments

Comments
 (0)