From 96036e68683f61aee8fe4fbac6035ea970a5dd78 Mon Sep 17 00:00:00 2001 From: Constantine Date: Mon, 9 Mar 2026 11:38:40 +0200 Subject: [PATCH 01/13] fix: Sending of the dynamic table size update Advertise the selected dynamic table size if the client's table size is not equal to Tempesta FW dynamic table size. Details: when Tempesta receives SETTINGS frame and SETTINGS_HEADER_TABLE_SIZE in this frame is greater than default Tempesta's 4096, Tempesta advertise the size that it will be used. Before this patch Tempesta ignored it when Tempesta's table size is equal to default 4096 and size advertised by the client is greater than 4096. In other cases Tempesta did send updates. For reference see: RFC7541 Section 4.2 --- fw/hpack.c | 24 +++++++++++++----------- fw/hpack.h | 2 +- fw/http2.c | 5 ++--- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fw/hpack.c b/fw/hpack.c index 999e7529f..b3c03507c 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -3724,17 +3724,12 @@ tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr) } void -tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) +tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, + unsigned int requested_size) { - if (new_size > HPACK_ENC_TABLE_MAX_SIZE) { - T_WARN("Client requests hpack table size (%hu), which is " - "greater than HPACK_ENC_TABLE_MAX_SIZE.", new_size); - new_size = HPACK_ENC_TABLE_MAX_SIZE; - } - T_DBG3("%s: tbl->rb_len=%hu, tbl->size=%hu, tbl->window=%hu," - " new_size=%hu\n", __func__, tbl->rb_len, tbl->size, - tbl->window, new_size); + " requested_size=%u\n", __func__, tbl->rb_len, tbl->size, + tbl->window, requested_size); /* * RFC7541#section-4.2: @@ -3744,9 +3739,13 @@ tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) * size that occurs in that interval MUST be signaled in a dynamic * table size update. */ - if (tbl->window != new_size && (likely(!tbl->wnd_changed) - || unlikely(!tbl->window) || new_size < tbl->window)) + if (tbl->window != requested_size && (likely(!tbl->wnd_changed) + || unlikely(!tbl->window) || requested_size < tbl->window)) { + unsigned short new_size = min_t(unsigned int, requested_size, + HPACK_ENC_TABLE_MAX_SIZE); + BUILD_BUG_ON(HPACK_ENC_TABLE_MAX_SIZE > USHRT_MAX || + sizeof(new_size) != sizeof(tbl->window)); if (tbl->size > new_size) tfw_hpack_rbuf_calc(tbl, new_size, NULL, (TfwHPackETblIter *)tbl); @@ -3754,6 +3753,9 @@ tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) tbl->window = new_size; tbl->wnd_changed = true; + + T_DBG3("%s: New hpack encoder table size has been set to %u\n", + __func__, tbl->window); } } diff --git a/fw/hpack.h b/fw/hpack.h index 1a139b5bb..e72bd80a9 100644 --- a/fw/hpack.h +++ b/fw/hpack.h @@ -306,7 +306,7 @@ int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr); int tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, bool use_pool, bool dyn_indexing); void tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, - unsigned short new_size); + unsigned int new_size); int tfw_hpack_decode(TfwHPack *__restrict hp, unsigned char *__restrict src, unsigned long n, TfwHttpReq *__restrict req, unsigned int *__restrict parsed); diff --git a/fw/http2.c b/fw/http2.c index 21c1ab1f6..92f27b257 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -99,9 +99,8 @@ tfw_h2_apply_settings_entry(TfwH2Ctx *ctx, unsigned short id, switch (id) { case HTTP2_SETTINGS_TABLE_SIZE: - dest->hdr_tbl_sz = min_t(unsigned int, - val, HPACK_ENC_TABLE_MAX_SIZE); - tfw_hpack_set_rbuf_size(&ctx->hpack.enc_tbl, dest->hdr_tbl_sz); + tfw_hpack_set_rbuf_size(&ctx->hpack.enc_tbl, val); + dest->hdr_tbl_sz = ctx->hpack.enc_tbl.window; break; case HTTP2_SETTINGS_ENABLE_PUSH: From 6d2e600e2895aa21f8f029db3c005ea2ab3d8393 Mon Sep 17 00:00:00 2001 From: Constantine Date: Wed, 11 Mar 2026 12:49:24 +0200 Subject: [PATCH 02/13] fix: HPACK dynamic table desync Tempesta cause dynamic table desynchronization between client and Tempesta, the reason is wrong encoding and sending order. When response is ready for sending to the client Tempesta do: 1. Select HTTP2 stream 2. Encode headers 3. Check TCP window 4. If window is enough - sends the headers however in case when TCP window is too small to send the encoded headers Tempesta postpone sending. It begins from point (1) when TCP window update is received, and issue is here, stream scheduler *may select* another stream to send thereby postponing the previous stream with already encoded headers [1]. To fix the issue in this patch we call `tfw_h2_stream_fsm_ignore_err()` with 0 flags, that leads to set `ctx->cur_send_headers` and forces stream scheduler to select this stream on the next scheduling cycle. It is little bit tricky, but seems a good solution it terms of latency. Alternative way is not encode headers if we lack of TCP window, however in this case we introduce some latency postponing encoding, but also there is a pros, if priority if the stream is changed and there is the stream with higher priority the stream with higher priority will be selected by scheduler that looks more accurate. [1] By design in HTTP/2, the stream for which the headers are encoded must be sent first. Otherwise, when encoding the next stream, new headers may be added to the HPACK dynamic table, which could invalidate the header indexes for the previous stream. --- fw/http_frame.c | 12 ++++++++++++ fw/http_stream.c | 10 ++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fw/http_frame.c b/fw/http_frame.c index e7d1bf5a8..e55e113c2 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2229,6 +2229,18 @@ do { \ } T_FSM_STATE(HTTP2_MAKE_HEADERS_FRAMES) { + /* + * This call doesn't change the stream state, but sets ctx->cur_send_headers. + * We do this to force the stream scheduler to select this + * stream during next sending if current sending of this stream + * has been postponed due to lack of tcp window. + */ + r = tfw_h2_stream_fsm_ignore_err(ctx, stream, HTTP2_HEADERS, 0); + if (unlikely(r)) { + T_WARN("Wrong state during sending headers.\n"); + return -EPIPE; + } + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_HEADERS, stream->xmit.h_len); if (unlikely(ctx->hpack.enc_tbl.wnd_changed)) { diff --git a/fw/http_stream.c b/fw/http_stream.c index 923ea20e0..c6f005c5a 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -346,10 +346,12 @@ do { \ } \ } while(0) -#define TFW_H2_FSM_TYPE_CHECK(ctx, stream, op, type) \ +#define TFW_H2_FSM_TYPE_CHECK(ctx, op, type, is_send) \ do { \ if ((ctx->cur_##op##_headers \ - && (type != HTTP2_CONTINUATION && type != HTTP2_RST_STREAM)) \ + && ((type == HTTP2_HEADERS && !is_send) || \ + (type != HTTP2_HEADERS && type != HTTP2_CONTINUATION && \ + type != HTTP2_RST_STREAM))) \ || (!ctx->cur_##op##_headers && type == HTTP2_CONTINUATION)) { \ *err = HTTP2_ECODE_PROTO; \ res = STREAM_FSM_RES_TERM_CONN; \ @@ -372,7 +374,7 @@ do { \ if (send) { TFW_H2_FSM_STREAM_CHECK(ctx, stream, send); - TFW_H2_FSM_TYPE_CHECK(ctx, stream, send, type); + TFW_H2_FSM_TYPE_CHECK(ctx, send, type, true); /* * Usually we would send HEADERS/CONTINUATION or DATA frames * to the client when HTTP2_STREAM_REM_HALF_CLOSED state @@ -399,7 +401,7 @@ do { \ */ } else { TFW_H2_FSM_STREAM_CHECK(ctx, stream, recv); - TFW_H2_FSM_TYPE_CHECK(ctx, stream, recv, type); + TFW_H2_FSM_TYPE_CHECK(ctx, recv, type, false); } switch (tfw_h2_get_stream_state(stream)) { From 1efdd6f2e74fe2844e1ec39f398d04da02888a76 Mon Sep 17 00:00:00 2001 From: Constantine Date: Fri, 13 Mar 2026 17:41:51 +0200 Subject: [PATCH 03/13] fix: couple of HPACK desyncs in trailers This patch fixes two issues with HPACK desync. The first one appears when we encode trailer headers at the moment of encoding regular headers For example: Tempesta encoded headers and trailers and proceed to sending the body. During sending the body scheduler few times preemt the current stream to send headers for another stream, therefore HPACK dynamic table contains the new headers and indexes used for trailers are invalid. To fix this we decided to not use HPACK dynamic table for trailers, it is a good trade-off between complexity and efficiency. The reason of the second is not sending new size of the dynamic table in trailer HEADERS frame. When client advertised the new size of the dynamic table Tempesta must respond with selected size in the first HEADERS it doesn't matter whether they are trailers or regular headers. Small refactoring to improve readability --- fw/hpack.c | 18 ++++---- fw/hpack.h | 7 ++- fw/http.c | 2 +- fw/http2.c | 2 +- fw/http_frame.c | 119 +++++++++++++++++++++++++++++------------------- 5 files changed, 89 insertions(+), 59 deletions(-) diff --git a/fw/hpack.c b/fw/hpack.c index b3c03507c..e56ca9d88 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -3718,9 +3718,10 @@ tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, * into the HTTP/2 HPACK format. */ int -tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr) +tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, + bool dyn_indexing) { - return __tfw_hpack_encode(resp, hdr, true, true, true); + return __tfw_hpack_encode(resp, hdr, true, dyn_indexing, true); } void @@ -3760,7 +3761,8 @@ tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, } int -tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream) +tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *tbl, struct sk_buff *skb_head, + unsigned int offset, unsigned int *acc_len) { TfwHPackInt tmp = {}; TfwStr dst = {}; @@ -3771,18 +3773,16 @@ tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream) WARN_ON_ONCE(!tbl->wnd_changed); write_int(tbl->window, 0x1F, 0x20, &tmp); - data = ss_skb_data_ptr_by_offset(stream->xmit.skb_head, - FRAME_HEADER_SIZE); + data = ss_skb_data_ptr_by_offset(skb_head, + offset + FRAME_HEADER_SIZE); BUG_ON(!data); - r = ss_skb_get_room_w_frag(stream->xmit.skb_head, - stream->xmit.skb_head, - data, tmp.sz, &dst, &_); + r = ss_skb_get_room_w_frag(skb_head, skb_head, data, tmp.sz, &dst, &_); if (unlikely(r)) return r; memcpy_fast(dst.data, tmp.buf, tmp.sz); - stream->xmit.h_len += tmp.sz; + *acc_len += tmp.sz; tbl->wnd_changed = false; return 0; diff --git a/fw/hpack.h b/fw/hpack.h index e72bd80a9..bf5f5324d 100644 --- a/fw/hpack.h +++ b/fw/hpack.h @@ -302,7 +302,8 @@ void write_int(unsigned long index, unsigned short max, unsigned short mask, int tfw_hpack_init(TfwHPack *__restrict hp, TfwClientMem *owner, unsigned int htbl_sz); void tfw_hpack_clean(TfwHPack *__restrict hp); -int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr); +int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, + bool dyn_indexing); int tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, bool use_pool, bool dyn_indexing); void tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, @@ -314,7 +315,9 @@ int tfw_hpack_cache_decode_expand(TfwHPack *__restrict hp, TfwHttpResp *__restrict resp, unsigned char *__restrict src, unsigned long n, TfwDecodeCacheIter *__restrict cd_iter); -int tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream); +int tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *tbl, struct sk_buff *skb_head, + unsigned int offset, + unsigned int *acc_len); static inline unsigned int tfw_hpack_int_size(unsigned long index, unsigned short max) diff --git a/fw/http.c b/fw/http.c index d7960ddac..aa4bbfeee 100644 --- a/fw/http.c +++ b/fw/http.c @@ -5385,7 +5385,7 @@ tfw_h2_hpack_encode_headers(TfwHttpResp *resp, const TfwHdrMods *h_mods) || tgt->flags & TFW_STR_TRAILER_HDR) continue; - r = tfw_hpack_transform(resp, tgt); + r = tfw_hpack_transform(resp, tgt, true); if (unlikely(r)) return r; } diff --git a/fw/http2.c b/fw/http2.c index 92f27b257..871f21f10 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -626,7 +626,7 @@ tfw_h2_hpack_encode_trailer_headers(TfwHttpResp *resp) T_DBG3("%s: hid=%hu, d_num=%hu, nchunks=%u\n", __func__, hid, d_num, ht->tbl[hid].nchunks); - r = tfw_hpack_transform(resp, tgt); + r = tfw_hpack_transform(resp, tgt, false); if (unlikely(r)) goto finish; } diff --git a/fw/http_frame.c b/fw/http_frame.c index e55e113c2..6bb820115 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2136,6 +2136,14 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, return r; } +static inline int +__tfw_h2_is_ready_to_send_postponed(const TfwH2Ctx *ctx, + const TfwStream *stream) +{ + return stream->xmit.postponed && !stream->xmit.frame_length && + !ctx->cur_send_headers; +} + static int tfw_h2_stream_send_postponed(struct sock *sk, struct sk_buff **skb_head, unsigned int mss_now, unsigned long *snd_wnd) @@ -2180,7 +2188,7 @@ do { \ T_FSM_EXIT(); \ } while(0) -#define CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(type, len) \ +#define CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(type, len) \ do { \ unsigned int max_len; \ unsigned int min_len; \ @@ -2241,13 +2249,15 @@ do { \ return -EPIPE; } - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_HEADERS, - stream->xmit.h_len); + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_HEADERS, + stream->xmit.h_len); if (unlikely(ctx->hpack.enc_tbl.wnd_changed)) { r = tfw_hpack_enc_tbl_write_sz(&ctx->hpack.enc_tbl, - stream); + stream->xmit.skb_head, + 0, + &stream->xmit.h_len); if (unlikely(r < 0)) { - T_WARN("Failed to encode hpack dynamic" + T_WARN("Failed to encode hpack dynamic " "table size %d", r); return r; } @@ -2264,8 +2274,8 @@ do { \ } T_FSM_STATE(HTTP2_MAKE_CONTINUATION_FRAMES) { - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_CONTINUATION, - stream->xmit.h_len); + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_CONTINUATION, + stream->xmit.h_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); if (unlikely(r)) { @@ -2280,8 +2290,8 @@ do { \ if (tfw_h2_conn_or_stream_wnd_is_exceeded(ctx, stream)) ADJUST_BLOCKED_STREAMS_AND_EXIT(0, HTTP2_DATA); - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_DATA, - stream->xmit.b_len); + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_DATA, + stream->xmit.b_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); if (unlikely (r)) { @@ -2295,8 +2305,28 @@ do { \ T_FSM_STATE(HTTP2_MAKE_TRAILER_FRAMES) { is_trailer_cont = true; - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_HEADERS, - stream->xmit.t_len); + /* + * This call doesn't change the stream state, but sets ctx->cur_send_headers. + * We do this to force the stream scheduler to select this + * stream during next sending if current sending of this stream + * has been postponed due to lack of tcp window. + */ + r = tfw_h2_stream_fsm_ignore_err(ctx, stream, HTTP2_HEADERS, 0); + + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_HEADERS, + stream->xmit.t_len); + + if (unlikely(ctx->hpack.enc_tbl.wnd_changed)) { + r = tfw_hpack_enc_tbl_write_sz(&ctx->hpack.enc_tbl, + stream->xmit.skb_head, + stream->xmit.frame_length, + &stream->xmit.t_len); + if (unlikely(r < 0)) { + T_WARN("Failed to encode hpack dynamic " + "table size %d", r); + return r; + } + } r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); if (unlikely(r)) { @@ -2309,8 +2339,8 @@ do { \ T_FSM_STATE(HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES) { is_trailer_cont = true; - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_CONTINUATION, - stream->xmit.t_len); + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_CONTINUATION, + stream->xmit.t_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); if (unlikely(r)) { @@ -2323,45 +2353,43 @@ do { \ T_FSM_STATE(HTTP2_SEND_FRAMES) { if (likely(stream->xmit.frame_length)) { - r = tfw_h2_entail_stream_skb(sk, ctx, stream, - &stream->xmit.frame_length, - false); + r = tfw_h2_entail_stream_skb(sk, ctx, stream, + &stream->xmit.frame_length, + false); if (unlikely(r)) { T_WARN("Failed to send frame %d", r); return r; } } - if (stream->xmit.h_len) { + if (stream->xmit.h_len) T_FSM_JMP(HTTP2_MAKE_CONTINUATION_FRAMES); - } else { - if (unlikely(stream->xmit.postponed) - && !stream->xmit.frame_length - && !ctx->cur_send_headers) - { - struct sk_buff **head = &stream->xmit.postponed; - - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, - snd_wnd); - if (unlikely(r)) { - T_WARN("Failed to send postponed" - " frames %d", r); - return r; - } + + if (unlikely(__tfw_h2_is_ready_to_send_postponed(ctx, + stream))) { + struct sk_buff **head = &stream->xmit.postponed; + + r = tfw_h2_stream_send_postponed(sk, head, mss_now, + snd_wnd); + if (unlikely(r)) { + T_WARN("Failed to send postponed frames %d", r); + return r; } - if (stream->xmit.b_len) { - T_FSM_JMP(HTTP2_MAKE_DATA_FRAMES); - } else if (stream->xmit.t_len) { - if (likely(!is_trailer_cont)) { - T_FSM_JMP(HTTP2_MAKE_TRAILER_FRAMES); - } else { - T_FSM_JMP(HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES); - } + } + + if (stream->xmit.b_len) { + T_FSM_JMP(HTTP2_MAKE_DATA_FRAMES); + } + else if (stream->xmit.t_len) { + if (likely(!is_trailer_cont)) { + T_FSM_JMP(HTTP2_MAKE_TRAILER_FRAMES); } else { - fallthrough; + T_FSM_JMP(HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES); } } + else { + fallthrough; + } } T_FSM_STATE(HTTP2_MAKE_FRAMES_FINISH) { @@ -2373,8 +2401,8 @@ do { \ */ if (unlikely(stream->xmit.skb_head)) { struct sk_buff **head = &stream->xmit.skb_head; - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, + + r = tfw_h2_stream_send_postponed(sk, head, mss_now, snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" @@ -2410,8 +2438,7 @@ do { \ { struct sk_buff **head = &stream->xmit.postponed; - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, + r = tfw_h2_stream_send_postponed(sk, head, mss_now, snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" @@ -2424,7 +2451,7 @@ do { \ return r; #undef FRAME_XMIT_FSM_NEXT -#undef CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE +#undef CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT #undef ADJUST_BLOCKED_STREAMS_AND_EXIT } From d31a0866a65d137869d57d21dae4bdd9af70a14a Mon Sep 17 00:00:00 2001 From: Constantine Date: Mon, 16 Mar 2026 13:31:57 +0200 Subject: [PATCH 04/13] refactor: Move skb related functions to ss_skb.h --- fw/cache.c | 1 - fw/http.c | 40 +++++------------- fw/http.h | 16 +------ fw/http_msg.c | 54 ++---------------------- fw/http_msg.h | 2 +- fw/ss_skb.c | 1 - fw/ss_skb.h | 88 +++++++++++++++++++++++++++++++++++++++ fw/t/unit/test_http_msg.c | 8 ++-- 8 files changed, 108 insertions(+), 102 deletions(-) diff --git a/fw/cache.c b/fw/cache.c index 79c69acb2..e7679f801 100644 --- a/fw/cache.c +++ b/fw/cache.c @@ -27,7 +27,6 @@ #include #include #include -#include #undef DEBUG #if DBG_CACHE > 0 diff --git a/fw/http.c b/fw/http.c index aa4bbfeee..4d51fc105 100644 --- a/fw/http.c +++ b/fw/http.c @@ -85,7 +85,6 @@ #include #include #include -#include #undef DEBUG #if DBG_HTTP > 0 @@ -1818,31 +1817,12 @@ do { \ } } -static void -__tfw_http_free_cleanup(TfwHttpMsgCleanup *cleanup) -{ - int i; - struct sk_buff *skb; - - while ((skb = ss_skb_dequeue(&cleanup->skb_head))) - __ss_kfree_skb(skb); - - for (i = 0; i < cleanup->pages_sz; i++) - /* - * Pass "true" even for non recyclable pages, relying on check - * pp_magic == PP_SIGNATURE in napi_pp_put_page(), which avoid - * recycling of non page_pool pages. Overhead seems the same - * as to have/maintain flag for each fragment. - */ - skb_page_unref(cleanup->pages[i], true); -} - static void __tfw_http_req_cleanup(TfwHttpReq *req) { if (!req->cleanup) return; - __tfw_http_free_cleanup(req->cleanup); + ss_skb_free_cleanup(req->cleanup); req->cleanup = NULL; } @@ -3918,7 +3898,7 @@ tfw_h1_adjust_req(TfwHttpReq *req) req->vhost, TFW_VHOST_HDRMOD_REQ); - req->cleanup = tfw_pool_alloc(hm->pool, sizeof(TfwHttpMsgCleanup)); + req->cleanup = tfw_pool_alloc(hm->pool, sizeof(TfwSkbCleanup)); if (unlikely(!req->cleanup)) return -ENOMEM; req->cleanup->pages_sz = 0; @@ -4272,10 +4252,10 @@ tfw_h2_adjust_req(TfwHttpReq *req) bool need_cl = req->body.len && TFW_STR_EMPTY(&ht->tbl[TFW_HTTP_HDR_CONTENT_LENGTH]); - req->cleanup = tfw_pool_alloc(req->pool, sizeof(TfwHttpMsgCleanup)); + req->cleanup = tfw_pool_alloc(req->pool, sizeof(TfwSkbCleanup)); if (unlikely(!req->cleanup)) return -ENOMEM; - memset(req->cleanup, 0, sizeof(TfwHttpMsgCleanup)); + memset(req->cleanup, 0, sizeof(TfwSkbCleanup)); if (need_cl) { cl_data_len = tfw_ultoa(req->body.len, cl_data, TFW_ULTOA_BUF_SIZ); @@ -4615,7 +4595,7 @@ tfw_http_resp_get_conn_flags(TfwHttpResp *resp) * headers will be avoided. */ static int -tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) +tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwSkbCleanup *cleanup) { void *opaque_data = TFW_SKB_CB(resp->msg.skb_head)->opaque_data; TfwMsgIter *iter = &resp->iter; @@ -4638,7 +4618,7 @@ tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) } static int -tfw_h1_resp_cutoff_headers(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) +tfw_h1_resp_cutoff_headers(TfwHttpResp *resp, TfwSkbCleanup *cleanup) { TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwHttpReq *req = resp->req; @@ -4736,7 +4716,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp) TfwHttpReq *req = resp->req; TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwMsgIter *iter = &resp->iter; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; const TfwHdrMods *h_mods = tfw_vhost_get_hdr_mods(req->location, req->vhost, TFW_VHOST_HDRMOD_RESP); @@ -4789,7 +4769,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp) r = tfw_http_msg_expand_from_pool(hm, &STR_CRLF); clean: - __tfw_http_free_cleanup(&cleanup); + ss_skb_free_cleanup(&cleanup); return r; } @@ -5883,7 +5863,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) TfwHttpReq *req = resp->req; TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwHttpTransIter *mit = &resp->mit; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwStr codings = {}; const TfwHdrMods *h_mods = tfw_vhost_get_hdr_mods(req->location, req->vhost, @@ -5985,7 +5965,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) req, resp); SS_SKB_QUEUE_DUMP(&resp->msg.skb_head); - __tfw_http_free_cleanup(&cleanup); + ss_skb_free_cleanup(&cleanup); return r; } diff --git a/fw/http.h b/fw/http.h index c77100f83..b6c9bd334 100644 --- a/fw/http.h +++ b/fw/http.h @@ -343,19 +343,6 @@ typedef struct { long m_date; } TfwHttpCond; -/** - * Represents the data that should be cleaned up after message transformation. - * - * @skb_head - head of skb list that must be freed; - * @pages - pages that must be freed; - * @pages_sz - current number of @pages; - */ -typedef struct { - struct sk_buff *skb_head; - netmem_ref pages[MAX_SKB_FRAGS]; - unsigned char pages_sz; -} TfwHttpMsgCleanup; - /** * HTTP Request. * @@ -404,7 +391,7 @@ struct tfw_http_req_t { TfwHttpSess *sess; TfwClient *peer; void *stale_ce; - TfwHttpMsgCleanup *cleanup; + TfwSkbCleanup *cleanup; TfwHttpCond cond; TfwMsgParseIter pit; HttpTfh tfh; @@ -815,5 +802,4 @@ void tfw_http_extract_request_authority(TfwHttpReq *req); bool tfw_http_mark_is_in_whitlist(unsigned int mark); char *tfw_http_resp_status_line(int status, size_t *len); int tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head); - #endif /* __TFW_HTTP_H__ */ diff --git a/fw/http_msg.c b/fw/http_msg.c index 09440a53d..6af2fd1ed 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -1303,52 +1303,6 @@ tfw_h2_msg_expand_from_pool_lc(TfwHttpMsg *hm, const TfwStr *str, return r; } -static inline void -__tfw_http_msg_move_frags(struct sk_buff *skb, int frag_idx, - TfwHttpMsgCleanup *cleanup) -{ - int i, len; - struct skb_shared_info *si = skb_shinfo(skb); - - for (i = 0, len = 0; i < frag_idx; i++) { - cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); - cleanup->pages_sz++; - len += skb_frag_size(&si->frags[i]); - } - - si->nr_frags -= frag_idx; - ss_skb_adjust_data_len(skb, -len); - memmove(&si->frags, &si->frags[frag_idx], - (si->nr_frags) * sizeof(skb_frag_t)); -} - -static inline void -__tfw_http_msg_rm_all_frags(struct sk_buff *skb, TfwHttpMsgCleanup *cleanup) -{ - int i, len; - struct skb_shared_info *si = skb_shinfo(skb); - - for (i = 0; i < si->nr_frags; i++) - cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); - - len = skb->data_len; - cleanup->pages_sz = si->nr_frags; - si->nr_frags = 0; - ss_skb_adjust_data_len(skb, -len); -} - -static inline void -__tfw_http_msg_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_idx]; - const int len = nbegin - (char*)skb_frag_address(frag); - - /* Add offset and decrease fragment's size */ - skb_frag_off_add(frag, len); - skb_frag_size_sub(frag, len); - ss_skb_adjust_data_len(skb, -len); -} - /* * Delete SKBs and paged fragments related to @hm that contains message * headers. SKBs and fragments will be "unlinked" and placed to @cleanup. @@ -1356,7 +1310,7 @@ __tfw_http_msg_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin * as source for message trasformation. */ int -tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) +tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwSkbCleanup *cleanup) { int i, r = 0; char *begin, *end; @@ -1402,14 +1356,14 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) * fragments from skb where LF is located. */ if (!body) { - __tfw_http_msg_rm_all_frags(it->skb, cleanup); + ss_skb_rm_all_frags(it->skb, cleanup); goto end; } else if (off != begin) { /* * Fragment contains headers and body. * Set beginning of frag as beginning of body. */ - __tfw_http_msg_shrink_frag(it->skb, i, off); + ss_skb_shrink_frag(it->skb, i, off); } /* @@ -1418,7 +1372,7 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) * from skb. */ if (i >= 1) - __tfw_http_msg_move_frags(it->skb, i, cleanup); + ss_skb_shift_frags(it->skb, i, cleanup); goto end; } diff --git a/fw/http_msg.h b/fw/http_msg.h index b255f25c5..3608b6758 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -163,7 +163,7 @@ int tfw_h2_msg_expand_from_pool_lc(TfwHttpMsg *hm, const TfwStr *str, TfwHttpTransIter *mit); int __hdr_name_cmp(const TfwStr *hdr, const TfwStr *cmp_hdr); int __http_hdr_lookup(TfwHttpMsg *hm, const TfwStr *hdr); -int tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup); +int tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwSkbCleanup *cleanup); #define TFW_H2_MSG_HDR_ADD(hm, name, val, idx) \ tfw_h2_msg_hdr_add(hm, name, sizeof(name) - 1, val, \ diff --git a/fw/ss_skb.c b/fw/ss_skb.c index e2596fdd9..a94efddac 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -25,7 +25,6 @@ */ #include #include -#include #include #include #include diff --git a/fw/ss_skb.h b/fw/ss_skb.h index b0febb0c0..5f7d2d812 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -23,6 +23,7 @@ #define __TFW_SS_SKB_H__ #include +#include #include #include "str.h" @@ -59,6 +60,19 @@ struct tfw_skb_cb { #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) +/** + * Represents a data that should be cleaned up. + * + * @skb_head - head of skb list that must be freed; + * @pages - pages that must be freed; + * @pages_sz - current number of @pages; + */ +typedef struct { + struct sk_buff *skb_head; + netmem_ref pages[MAX_SKB_FRAGS]; + unsigned char pages_sz; +} TfwSkbCleanup; + void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), TfwClientMem *owner, unsigned int delta); void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); @@ -487,6 +501,80 @@ int ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, unsigned char *split_point); int ss_skb_realloc_headroom(struct sk_buff *skb); +/* Remove all paged fragments from @skb and move them into @cleanup. */ +static inline void +ss_skb_rm_all_frags(struct sk_buff *skb, TfwSkbCleanup *cleanup) +{ + int i, len; + struct skb_shared_info *si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) + cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); + + len = skb->data_len; + cleanup->pages_sz = si->nr_frags; + si->nr_frags = 0; + ss_skb_adjust_data_len(skb, -len); +} + +/* + * Remove paged fragments until @frag_idx and move them into @cleanup. Shift + * remaining fragments to the beginning of fragments array. + */ +static inline void +ss_skb_shift_frags(struct sk_buff *skb, int frag_idx, + TfwSkbCleanup *cleanup) +{ + int i, len; + struct skb_shared_info *si = skb_shinfo(skb); + + for (i = 0, len = 0; i < frag_idx; i++) { + cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); + cleanup->pages_sz++; + len += skb_frag_size(&si->frags[i]); + } + + si->nr_frags -= frag_idx; + ss_skb_adjust_data_len(skb, -len); + memmove(&si->frags, &si->frags[frag_idx], + (si->nr_frags) * sizeof(skb_frag_t)); +} + +/* + * Shrink fragment with @frag_idx index, set @nbegin as the starting position + * of that fragment. + */ +static inline void +ss_skb_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_idx]; + const int len = nbegin - (char *)skb_frag_address(frag); + + /* Add offset and decrease fragment's size */ + skb_frag_off_add(frag, len); + skb_frag_size_sub(frag, len); + ss_skb_adjust_data_len(skb, -len); +} + +static inline void +ss_skb_free_cleanup(TfwSkbCleanup *cleanup) +{ + int i; + struct sk_buff *skb; + + while ((skb = ss_skb_dequeue(&cleanup->skb_head))) + __ss_kfree_skb(skb); + + for (i = 0; i < cleanup->pages_sz; i++) + /* + * Pass "true" even for non recyclable pages, relying on check + * pp_magic == PP_SIGNATURE in napi_pp_put_page(), which avoid + * recycling of non page_pool pages. Overhead seems the same + * as to have/maintain flag for each fragment. + */ + skb_page_unref(cleanup->pages[i], true); +} + #if defined(DEBUG) && (DEBUG >= 4) #define ss_skb_queue_for_each_do(queue, lambda) \ do { \ diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index 6a9e5a3cc..c2f6f5619 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -165,7 +165,7 @@ TEST(http_msg, cutoff_linear_headers_paged_body) TFW_STR_STRING("paged_body") }; TfwStr *head = &frags[0], *pgd = &frags[1]; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -200,7 +200,7 @@ TEST(http_msg, cutoff_linear_headers_and_linear_body) TFW_STR_STRING("paged_body2") }; TfwStr *head = &frags[0], *pgd = &frags[1]; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -232,7 +232,7 @@ TEST(http_msg, expand_from_pool_for_headers) }; TfwStr *hdr = &frags[0], *head = &frags[0], *pgd = &frags[1]; TfwHttpMsg *msg = (TfwHttpMsg *)resp; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -287,7 +287,7 @@ TEST(http_msg, expand_from_pool_for_trailers) }; TfwStr *trailer = &frags[0], *head = &frags[1], *pgd = &frags[2]; TfwHttpMsg *msg = (TfwHttpMsg *)resp; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; From 9695e2881b711239300aedbf93a2ecc7170019c0 Mon Sep 17 00:00:00 2001 From: Constantine Date: Mon, 16 Mar 2026 14:50:15 +0200 Subject: [PATCH 05/13] refactor: get rid of code duplication --- fw/http_msg.c | 3 +-- fw/ss_skb.c | 3 +-- fw/ss_skb.h | 7 +++++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fw/http_msg.c b/fw/http_msg.c index 6af2fd1ed..4971ce20d 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -1337,8 +1337,7 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwSkbCleanup *cleanup) it->skb, body); break; } else { - ss_skb_put(it->skb, -skb_headlen(it->skb)); - it->skb->tail_lock = 1; + ss_skb_remove_linear_data(it->skb); } } diff --git a/fw/ss_skb.c b/fw/ss_skb.c index a94efddac..39648ffb3 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -1701,8 +1701,7 @@ ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, if (!split_point) { /* Usage of linear portion of SKB is not expected */ - ss_skb_put(skb, -skb_headlen(skb)); - skb->tail_lock = 1; + ss_skb_remove_linear_data(skb); } else { unsigned int off = split_point - skb->data; diff --git a/fw/ss_skb.h b/fw/ss_skb.h index 5f7d2d812..809c907e6 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -501,6 +501,13 @@ int ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, unsigned char *split_point); int ss_skb_realloc_headroom(struct sk_buff *skb); +static inline void +ss_skb_remove_linear_data(struct sk_buff *skb) +{ + ss_skb_put(skb, -skb_headlen(skb)); + skb->tail_lock = 1; +} + /* Remove all paged fragments from @skb and move them into @cleanup. */ static inline void ss_skb_rm_all_frags(struct sk_buff *skb, TfwSkbCleanup *cleanup) From 6d946578b038d359ea613b38815d54275d25ec3d Mon Sep 17 00:00:00 2001 From: Constantine Date: Thu, 9 Apr 2026 15:09:22 +0300 Subject: [PATCH 06/13] fix: forbid `RST_STREAM` between `HEADERS` and `CONTINUATION` frames By the standart during sending headers block any others frames are forbidden except CONTINUATION frame --- fw/http_stream.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fw/http_stream.c b/fw/http_stream.c index c6f005c5a..92a299034 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -350,8 +350,7 @@ do { \ do { \ if ((ctx->cur_##op##_headers \ && ((type == HTTP2_HEADERS && !is_send) || \ - (type != HTTP2_HEADERS && type != HTTP2_CONTINUATION && \ - type != HTTP2_RST_STREAM))) \ + (type != HTTP2_HEADERS && type != HTTP2_CONTINUATION))) \ || (!ctx->cur_##op##_headers && type == HTTP2_CONTINUATION)) { \ *err = HTTP2_ECODE_PROTO; \ res = STREAM_FSM_RES_TERM_CONN; \ From 5fa0db45b1287f3fee5be703001353e2c34a907f Mon Sep 17 00:00:00 2001 From: Constantine Date: Tue, 21 Apr 2026 13:54:50 +0300 Subject: [PATCH 07/13] fix: blocked data sending even when tcp window is enough When frame is prepared and window is enough to make one more frame we call `FRAME_XMIT_FSM_NEXT()` which decreases `snd_wnd` by `frame_length` that is correct, however in the macro `CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT()` we check `snd_wnd` for `min_to_send` regardless we have prepared frame or not, and if the last prepared frame is smaller than `min_to_send` we don't send this frame but send only the first prepared frame. This is looks incorrect because if we have 2048 window and the first prepared frame 1536 we don't send remaining 512 bytes for 1500 mtu. Performance difference. This patch: finished in 50.05s, 1920200.18 req/s, 1.45GB/s requests: 96010009 total, 96020009 started, 96010009 done, 96010009 succeeded, 0 failed, 0 errored, 0 timeout status codes: 96010009 2xx, 0 3xx, 0 4xx, 0 5xx traffic: 72.69GB (78047130589) total, 16.09GB (17272778551) headers (space savings 23.09%), 54.99GB (59046155535) data min max mean sd +/- sd time for request: 354us 38.71ms 6.27ms 3.62ms 73.81% time for connect: 12.65ms 56.41ms 29.05ms 11.35ms 61.00% time to 1st byte: 16.81ms 75.64ms 36.31ms 13.33ms 63.00% req/s : 5685.92 36745.74 19201.81 7963.58 69.00% Master: finished in 50.04s, 1755808.42 req/s, 1.33GB/s requests: 87790421 total, 87800421 started, 87790421 done, 87790421 succeeded, 0 failed, 0 errored, 0 timeout status codes: 87790421 2xx, 0 3xx, 0 4xx, 0 5xx traffic: 66.39GB (71285828311) total, 14.64GB (15714485359) headers (space savings 23.18%), 50.28GB (53991108915) data min max mean sd +/- sd time for request: 431us 38.01ms 7.03ms 4.17ms 73.04% time for connect: 13.60ms 45.94ms 26.00ms 6.08ms 75.00% time to 1st byte: 18.68ms 61.10ms 33.66ms 8.77ms 66.00% req/s : 6335.27 36376.15 17557.90 7980.11 68.00% --- fw/http_frame.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fw/http_frame.c b/fw/http_frame.c index 6bb820115..f3d99c104 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2199,15 +2199,25 @@ do { \ } \ max_len = min(TLS_MAX_PAYLOAD_SIZE, *snd_wnd - TLS_MAX_OVERHEAD); \ max_len -= FRAME_HEADER_SIZE; \ - min_len = min(min_to_send, (unsigned int)len); \ frame_length = tfw_h2_calc_frame_length(ctx, stream, type, len, \ max_len); \ - /* \ - * If the lenght of data to send is less then `min_to_send` \ - * use it as a minimum bytes to send. \ + /* + * Apply min sending data optimization only if there is no already + * prepared frames, this prevents from cutting large skb and prevents + * data postponing when we can simply send remaining data in multiple + * tcp segments offloading SKB segmenation to NIC. This avoids an + * additional call to ss_skb_split(). + * */ \ - if (frame_length < min_len) \ - ADJUST_BLOCKED_STREAMS_AND_EXIT(min_len, type); \ + if (stream->xmit.frame_length == 0) { \ + /* + * If the length of data to send is less then `min_to_send` + * use it as a minimum bytes to send. + */ \ + min_len = min_t(int, min_to_send, len); \ + if (frame_length < min_len) \ + ADJUST_BLOCKED_STREAMS_AND_EXIT(min_len, type); \ + } \ frame_type = type; \ } while(0) From ca7304123883a21387ec8e4adc7a7117cc068630 Mon Sep 17 00:00:00 2001 From: Constantine Date: Tue, 21 Apr 2026 15:57:34 +0300 Subject: [PATCH 08/13] fix: sending and receiving RST_STREAM breaks HPACK context Before this patch Tempesta stopped to send headers block when RST_STREAM frame was received or sent, that broke HPACK state. In this patch we sending all encoded headers to maintain synchronized HPACK dynamic table. --- fw/http_frame.c | 93 +++++++++++++++++++++++++++--------------------- fw/http_stream.c | 21 ++++++++--- 2 files changed, 69 insertions(+), 45 deletions(-) diff --git a/fw/http_frame.c b/fw/http_frame.c index f3d99c104..29985dfc3 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -199,7 +199,19 @@ do { \ return T_BAD; \ } else if (res == STREAM_FSM_RES_TERM_STREAM) { \ WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); \ - return tfw_h2_current_stream_send_rst((ctx), err); \ + if (ctx->cur_stream != ctx->cur_send_headers) { \ + return tfw_h2_current_stream_send_rst((ctx), \ + err); \ + } else { \ + unsigned int id = ctx->cur_stream->id; \ + /** + * If Tempesta is already sending headers, + * only update the stream state and schedule + * sending an RST_STREAM frame, but do not + * remove the stream from the scheduling queue. + */ \ + return tfw_h2_send_rst_stream(ctx, id, err); \ + } \ } \ return T_OK; \ } \ @@ -1638,7 +1650,8 @@ tfw_h2_frame_recv(void *data, unsigned char *buf, unsigned int len, T_FSM_STATE(HTTP2_RECV_FRAME_RST_STREAM) { FRAME_FSM_READ_SRVC(ctx->to_read); - tfw_h2_rst_stream_process(ctx); + if (ctx->cur_stream != ctx->cur_send_headers) + tfw_h2_rst_stream_process(ctx); FRAME_FSM_EXIT(T_OK); } @@ -2054,6 +2067,43 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, bool trailers = false; char *data; int r = 0; + unsigned char flags; + + if (type == HTTP2_DATA) { + ctx->rem_wnd -= frame_length; + ctx->data_bytes_sent += frame_length; + stream->rem_wnd -= frame_length; + stream->xmit.b_len -= frame_length; + } else if (stream->xmit.h_len) { + stream->xmit.h_len -= frame_length; + } else if (stream->xmit.t_len) { + stream->xmit.t_len -= frame_length; + trailers = true; + } + flags = tfw_h2_calc_frame_flags(stream, type, trailers); + + switch (tfw_h2_stream_fsm_ignore_err(ctx, stream, type, flags)) { + case STREAM_FSM_RES_OK: + break; + case STREAM_FSM_RES_IGNORE: + fallthrough; + case STREAM_FSM_RES_TERM_STREAM: + /* Send previosly successfully prepared frames if exist. */ + if (stream->xmit.frame_length) { + r = tfw_h2_entail_stream_skb(sk, ctx, stream, + &stream->xmit.frame_length, + true); + } + stream->xmit.frame_length += frame_length; + /* + * Purge stream send queue, but leave postponed + * skbs and rst stream/goaway/tls alert if exist. + */ + tfw_h2_stream_purge_send_queue(stream); + return r; + case STREAM_FSM_RES_TERM_CONN: + return -EPIPE; + } /* * Very unlikely case, when skb_head and one or more next skbs @@ -2088,50 +2138,13 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, data = dst.data; } - if (type == HTTP2_DATA) { - ctx->rem_wnd -= frame_length; - ctx->data_bytes_sent += frame_length; - stream->rem_wnd -= frame_length; - stream->xmit.b_len -= frame_length; - } else if (stream->xmit.h_len) { - stream->xmit.h_len -= frame_length; - } else if (stream->xmit.t_len) { - stream->xmit.t_len -= frame_length; - trailers = true; - } - frame_hdr.length = frame_length; frame_hdr.stream_id = stream->id; frame_hdr.type = type; - frame_hdr.flags = tfw_h2_calc_frame_flags(stream, type, trailers); + frame_hdr.flags = flags; tfw_h2_pack_frame_header(data, &frame_hdr); stream->xmit.frame_length += frame_length + FRAME_HEADER_SIZE; - switch (tfw_h2_stream_fsm_ignore_err(ctx, stream, type, - frame_hdr.flags)) - { - case STREAM_FSM_RES_OK: - break; - case STREAM_FSM_RES_IGNORE: - fallthrough; - case STREAM_FSM_RES_TERM_STREAM: - /* Send previosly successfully prepared frames if exist. */ - stream->xmit.frame_length -= frame_length + FRAME_HEADER_SIZE; - if (stream->xmit.frame_length) { - r = tfw_h2_entail_stream_skb(sk, ctx, stream, - &stream->xmit.frame_length, - true); - } - stream->xmit.frame_length += frame_length + FRAME_HEADER_SIZE; - /* - * Purge stream send queue, but leave postponed - * skbs and rst stream/goaway/tls alert if exist. - */ - tfw_h2_stream_purge_send_queue(stream); - return r; - case STREAM_FSM_RES_TERM_CONN: - return -EPIPE; - } return r; } diff --git a/fw/http_stream.c b/fw/http_stream.c index 92a299034..c903977dc 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -754,9 +754,24 @@ do { \ break; case HTTP2_STREAM_CLOSED: - T_WARN("%s, stream fully closed: stream->id=%u, type=%hhu," + T_DBG2("%s, stream fully closed: stream->id=%u, type=%hhu," " flags=0x%hhx\n", __func__, stream->id, type, flags); if (send) { + const bool is_headers = type == HTTP2_HEADERS || + type == HTTP2_CONTINUATION; + + if (ctx->cur_send_headers && is_headers) { + /* + * Headers has been sent in closed state. + * It happens when Tempesta encoded all headers + * and after received RST_STREAM. To not break + * compression state Tempesta sends all remaining + * headers. + */ + if (flags & HTTP2_F_END_HEADERS) + ctx->cur_send_headers = NULL; + break; + } res = STREAM_FSM_RES_IGNORE; } else { if (type != HTTP2_PRIORITY) { @@ -764,16 +779,12 @@ do { \ res = STREAM_FSM_RES_TERM_CONN; } } - break; default: BUG(); } finish: - if (type == HTTP2_RST_STREAM || res == STREAM_FSM_RES_TERM_STREAM) - tfw_h2_conn_reset_stream_on_close(ctx, stream); - T_DBG4("exit %s: strm [%p] state %d(%s), res %d\n", __func__, stream, tfw_h2_get_stream_state(stream), __h2_strm_st_n(stream), res); From f6e2987e92f6df3b635b17aff0a1e0e638bcc453 Mon Sep 17 00:00:00 2001 From: Constantine Date: Wed, 22 Apr 2026 17:20:53 +0300 Subject: [PATCH 09/13] fix: reset connection instead of stream The case when Tempesta receives obviously broken HEADERS or PRIORITY frame Tempesta treats it as suspicios and do disconnect instead of handling this case with RST_STREAM. It looks that it doesn't makes sense to continue service this connection. Also this removes the attack vector for RST_STREAM flood. RFC 9113 5.4.1. Connection Error Handling: An endpoint can end a connection at any time. In particular, an endpoint MAY choose to treat a stream error as a connection error. --- fw/http_frame.c | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/fw/http_frame.c b/fw/http_frame.c index 29985dfc3..73ccc2f85 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -679,15 +679,13 @@ tfw_h2_headers_process(TfwH2Ctx *ctx) T_DBG("Invalid dependency: new stream with %u depends on" " itself\n", hdr->stream_id); - ctx->state = HTTP2_IGNORE_FRAME_DATA; - - if (likely(!ctx->cur_stream)) { - return tfw_h2_send_rst_stream(ctx, hdr->stream_id, - HTTP2_ECODE_PROTO); - } - - WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); - return tfw_h2_current_stream_send_rst(ctx, HTTP2_ECODE_PROTO); + /* + * RFC 7540 states that it MUST be treated as a stream-level + * error, however, it doesn’t make sense to continue servicing + * a suspicious connection. + */ + tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); + return T_BAD; } if (likely(!ctx->cur_stream)) { @@ -789,18 +787,6 @@ tfw_h2_priority_process(TfwH2Ctx *ctx) return T_OK; } - if (ctx->cur_stream->state == HTTP2_STREAM_IDLE) { - /* - * According to RFC 9113 we should response with stream - * error of type PROTOCOL ERROR here, but we can't send - * RST_STREAM for idle stream. - * RFC 9113 doesn't describe this case, so terminate - * connection. - */ - tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); - return T_BAD; - } - /* * Stream cannot depend on itself (see RFC 7540 section 5.1.2 for * details). @@ -808,12 +794,14 @@ tfw_h2_priority_process(TfwH2Ctx *ctx) T_DBG("Invalid dependency: new stream with %u depends on itself\n", hdr->stream_id); - if (tfw_h2_stream_fsm_ignore_err(ctx, ctx->cur_stream, - HTTP2_RST_STREAM, 0)) - return -EPERM; + /* + * RFC 7540 states that it MUST be treated as a stream-level error, + * however, it doesn’t make sense to continue servicing a suspicious + * connection. + */ + tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); - WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); - return tfw_h2_current_stream_send_rst(ctx, HTTP2_ECODE_PROTO); + return T_BAD; } static inline void From fe67274f3728293c27049ca8d68bf4085921c4d8 Mon Sep 17 00:00:00 2001 From: Constantine Date: Thu, 30 Apr 2026 15:03:47 +0300 Subject: [PATCH 10/13] fix: write dynamic table size before check send window Do this to ensure that the function calculating frame size accounts for the dynamic table size, not just the headers, and does not produce CONTINUATION frame if the data fits within a single HEADERS frame. The main reason to fix this: It seems firefox has a bug because of that it can't proccess CONTINUATION frame for closed stream and resets connection breaking page loading. --- fw/http_frame.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fw/http_frame.c b/fw/http_frame.c index 73ccc2f85..3e8eaae32 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2260,8 +2260,6 @@ do { \ return -EPIPE; } - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_HEADERS, - stream->xmit.h_len); if (unlikely(ctx->hpack.enc_tbl.wnd_changed)) { r = tfw_hpack_enc_tbl_write_sz(&ctx->hpack.enc_tbl, stream->xmit.skb_head, @@ -2274,6 +2272,9 @@ do { \ } } + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_HEADERS, + stream->xmit.h_len); + r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); if (unlikely(r)) { From 00765822c2f8c57d8a828f95be3025f90b9552fd Mon Sep 17 00:00:00 2001 From: Constantine Date: Thu, 30 Apr 2026 18:22:11 +0300 Subject: [PATCH 11/13] refactor: "overloaded" function `tfw_h2_insert_frame_header()` The function must have only one responsibility insert frame headers, however before this patch it also sent data. In this patch we extract data sending logic into dedicated state to make code more understandable and explicit --- fw/http_frame.c | 113 ++++++++++++++++++++++++++++++----------------- fw/http_stream.h | 1 + 2 files changed, 73 insertions(+), 41 deletions(-) diff --git a/fw/http_frame.c b/fw/http_frame.c index 3e8eaae32..e4c1a3160 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2070,28 +2070,9 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, } flags = tfw_h2_calc_frame_flags(stream, type, trailers); - switch (tfw_h2_stream_fsm_ignore_err(ctx, stream, type, flags)) { - case STREAM_FSM_RES_OK: - break; - case STREAM_FSM_RES_IGNORE: - fallthrough; - case STREAM_FSM_RES_TERM_STREAM: - /* Send previosly successfully prepared frames if exist. */ - if (stream->xmit.frame_length) { - r = tfw_h2_entail_stream_skb(sk, ctx, stream, - &stream->xmit.frame_length, - true); - } - stream->xmit.frame_length += frame_length; - /* - * Purge stream send queue, but leave postponed - * skbs and rst stream/goaway/tls alert if exist. - */ - tfw_h2_stream_purge_send_queue(stream); + r = tfw_h2_stream_fsm_ignore_err(ctx, stream, type, flags); + if (unlikely(r)) return r; - case STREAM_FSM_RES_TERM_CONN: - return -EPIPE; - } /* * Very unlikely case, when skb_head and one or more next skbs @@ -2277,10 +2258,8 @@ do { \ r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely(r)) { - T_WARN("Failed to make headers frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); } @@ -2290,10 +2269,8 @@ do { \ stream->xmit.h_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely(r)) { - T_WARN("Failed to make continuation frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); } @@ -2306,10 +2283,8 @@ do { \ stream->xmit.b_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely (r)) { - T_WARN("Failed to make data frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); ctx->data_frames_sent++; FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); @@ -2341,10 +2316,8 @@ do { \ } r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely(r)) { - T_WARN("Failed to make trail headers frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); } @@ -2355,10 +2328,8 @@ do { \ stream->xmit.t_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely(r)) { - T_WARN("Failed to make trail continuation frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); } @@ -2433,6 +2404,66 @@ do { \ T_FSM_EXIT(); } + /* + * In this state we handle framing error. It may happen if we trying + * to send DATA frames to closed stream. e.g Stream closed by the + * client while receiving response, that is valid behavior for firefox. + */ + T_FSM_STATE(HTTP2_FRAMING_FAILED) { + switch (r) { + case STREAM_FSM_RES_IGNORE: + fallthrough; + case STREAM_FSM_RES_TERM_STREAM: + /* Send previosly successfully prepared frames if exist. */ + if (stream->xmit.frame_length) { + r = tfw_h2_entail_stream_skb(sk, ctx, stream, + &stream->xmit.frame_length, + true); + if (unlikely(r)) + return r; + } + + /* During headers insertion we already subtract + * @frame_length from b_len, h_len or t_len. However + * tfw_h2_stream_purge_send_queue() need actual + * size of the queue data to completely free it. Thus + * restore actual size here. + */ + stream->xmit.frame_length += frame_length; + /** + * Purge stream send queue, but leave postponed + * skbs and rst stream/goaway/tls alert if exist. + */ + tfw_h2_stream_purge_send_queue(stream); + + if (unlikely(stream->xmit.postponed) && + !ctx->cur_send_headers) { + struct sk_buff **head = &stream->xmit.postponed; + + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); + if (unlikely(r)) { + T_WARN("Failed to send postponed" + " frames %d", r); + return r; + } + } + T_FSM_JMP(HTTP2_MAKE_FRAMES_FINISH); + case STREAM_FSM_RES_TERM_CONN: + return -EPIPE; + default: + /* + * Framing error not occurred but framing failed state + * reached. + */ + WARN_ON_ONCE(!r); + return -EPIPE; + } + + T_FSM_EXIT(); + } + } T_FSM_FINISH(r, stream->xmit.state); diff --git a/fw/http_stream.h b/fw/http_stream.h index 4e9b2c5d4..fd675e81e 100644 --- a/fw/http_stream.h +++ b/fw/http_stream.h @@ -67,6 +67,7 @@ typedef enum { HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES, HTTP2_SEND_FRAMES, HTTP2_MAKE_FRAMES_FINISH, + HTTP2_FRAMING_FAILED } TfwStreamXmitState; static const char *__tfw_strm_st_names[] = { From 6eba21e01601f6520648c6b4d3a969cf9da5ba7c Mon Sep 17 00:00:00 2001 From: Constantine Date: Thu, 14 May 2026 11:48:21 +0300 Subject: [PATCH 12/13] fix: don't return positive values --- fw/http_frame.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fw/http_frame.c b/fw/http_frame.c index e4c1a3160..06c29494e 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2414,6 +2414,13 @@ do { \ case STREAM_FSM_RES_IGNORE: fallthrough; case STREAM_FSM_RES_TERM_STREAM: + /* + * In this case r is positive, set it to zero to not + * return positive r from this function. That doesn't + * reset connection but stops stream scheduling even + * if we have enough send window. + */ + r = 0; /* Send previosly successfully prepared frames if exist. */ if (stream->xmit.frame_length) { r = tfw_h2_entail_stream_skb(sk, ctx, stream, From 0edde4e3385dfdb1b5026a3030fdd5c9758448b4 Mon Sep 17 00:00:00 2001 From: Constantine Date: Thu, 14 May 2026 11:57:51 +0300 Subject: [PATCH 13/13] refactor: Move adding stream to closed to the same place as cleaning --- fw/http_frame.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fw/http_frame.c b/fw/http_frame.c index 06c29494e..24e9e2add 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2395,12 +2395,6 @@ do { \ } if (stream == ctx->error) ctx->error = NULL; - /* - * Don't put exclusive streams in closed queue it - * will be immediately deleted in the caller function. - */ - if (!stream_is_exclusive) - tfw_h2_stream_add_closed(ctx, stream); T_FSM_EXIT(); } @@ -2556,6 +2550,8 @@ tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now, if (stream_is_exclusive) { tfw_h2_stream_clean(ctx, stream); } else { + tfw_h2_stream_add_closed(ctx, stream); + TfwStreamSchedEntry *parent = stream->sched->parent;