Skip to content

Commit 73373ab

Browse files
committed
MEDIUM: h1: deduplicate the content-length header
Just like we used to do in proto_http, we now check that each and every occurrence of the content-length header field and each of its values are exactly identical, and we normalize the header to return the last value of the first header with spaces trimmed.
1 parent 2557f6a commit 73373ab

File tree

1 file changed

+86
-7
lines changed

1 file changed

+86
-7
lines changed

src/h1.c

Lines changed: 86 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,78 @@ void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
660660
}
661661

662662

663+
/* Parse the Content-Length header field of an HTTP/1 request. The function
664+
* checks all possible occurrences of a comma-delimited value, and verifies
665+
* if any of them doesn't match a previous value. It returns <0 if a value
666+
* differs, 0 if the whole header can be dropped (i.e. already known), or >0
667+
* if the value can be indexed (first one). In the last case, the value might
668+
* be adjusted and the caller must only add the updated value.
669+
*/
670+
int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
671+
{
672+
char *e, *n;
673+
long long cl;
674+
int not_first = !!(h1m->flags & H1_MF_CLEN);
675+
struct ist word;
676+
677+
word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
678+
e = value->ptr + value->len;
679+
680+
while (++word.ptr < e) {
681+
/* skip leading delimitor and blanks */
682+
if (unlikely(HTTP_IS_LWS(*word.ptr)))
683+
continue;
684+
685+
/* digits only now */
686+
for (cl = 0, n = word.ptr; n < e; n++) {
687+
unsigned int c = *n - '0';
688+
if (unlikely(c > 9)) {
689+
/* non-digit */
690+
if (unlikely(n == word.ptr)) // spaces only
691+
goto fail;
692+
break;
693+
}
694+
if (unlikely(cl > ULLONG_MAX / 10ULL))
695+
goto fail; /* multiply overflow */
696+
cl = cl * 10ULL;
697+
if (unlikely(cl + c < cl))
698+
goto fail; /* addition overflow */
699+
cl = cl + c;
700+
}
701+
702+
/* keep a copy of the exact cleaned value */
703+
word.len = n - word.ptr;
704+
705+
/* skip trailing LWS till next comma or EOL */
706+
for (; n < e; n++) {
707+
if (!HTTP_IS_LWS(*n)) {
708+
if (unlikely(*n != ','))
709+
goto fail;
710+
break;
711+
}
712+
}
713+
714+
/* if duplicate, must be equal */
715+
if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
716+
goto fail;
717+
718+
/* OK, store this result as the one to be indexed */
719+
h1m->flags |= H1_MF_CLEN;
720+
h1m->curr_len = h1m->body_len = cl;
721+
*value = word;
722+
word.ptr = n;
723+
}
724+
/* here we've reached the end with a single value or a series of
725+
* identical values, all matching previous series if any. The last
726+
* parsed value was sent back into <value>. We just have to decide
727+
* if this occurrence has to be indexed (it's the first one) or
728+
* silently skipped (it's not the first one)
729+
*/
730+
return !not_first;
731+
fail:
732+
return -1;
733+
}
734+
663735
/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
664736
* "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
665737
* this case. Any other token found or any empty header field found will reset
@@ -1301,8 +1373,8 @@ int h1_headers_to_hdr_list(char *start, const char *stop,
13011373
n = ist2(start + sol, col - sol);
13021374
v = ist2(start + sov, eol - sov);
13031375

1304-
if (likely(!skip_update)) {
1305-
long long cl;
1376+
if (likely(!skip_update)) do {
1377+
int ret;
13061378

13071379
if (unlikely(hdr_count >= hdr_num)) {
13081380
state = H1_MSG_HDR_L2_LWS;
@@ -1312,17 +1384,24 @@ int h1_headers_to_hdr_list(char *start, const char *stop,
13121384
if (isteqi(n, ist("transfer-encoding"))) {
13131385
h1_parse_xfer_enc_header(h1m, v);
13141386
}
1315-
else if (isteqi(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
1316-
h1m->flags |= H1_MF_CLEN;
1317-
strl2llrc(v.ptr, v.len, &cl);
1318-
h1m->curr_len = h1m->body_len = cl;
1387+
else if (isteqi(n, ist("content-length"))) {
1388+
ret = h1_parse_cont_len_header(h1m, &v);
1389+
1390+
if (ret < 0) {
1391+
state = H1_MSG_HDR_L2_LWS;
1392+
goto http_msg_invalid;
1393+
}
1394+
else if (ret == 0) {
1395+
/* skip it */
1396+
break;
1397+
}
13191398
}
13201399
else if (isteqi(n, ist("connection"))) {
13211400
h1_parse_connection_header(h1m, v);
13221401
}
13231402

13241403
http_set_hdr(&hdr[hdr_count++], n, v);
1325-
}
1404+
} while (0);
13261405

13271406
sol = ptr - start;
13281407
if (likely(!HTTP_IS_CRLF(*ptr)))

0 commit comments

Comments
 (0)