diff --git a/fw/cache.c b/fw/cache.c index 79c69acb2..e7679f801 100644 --- a/fw/cache.c +++ b/fw/cache.c @@ -27,7 +27,6 @@ #include #include #include -#include #undef DEBUG #if DBG_CACHE > 0 diff --git a/fw/hpack.c b/fw/hpack.c index 999e7529f..e56ca9d88 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -3718,23 +3718,19 @@ tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, * into the HTTP/2 HPACK format. */ int -tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr) +tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, + bool dyn_indexing) { - return __tfw_hpack_encode(resp, hdr, true, true, true); + return __tfw_hpack_encode(resp, hdr, true, dyn_indexing, true); } void -tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) +tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, + unsigned int requested_size) { - if (new_size > HPACK_ENC_TABLE_MAX_SIZE) { - T_WARN("Client requests hpack table size (%hu), which is " - "greater than HPACK_ENC_TABLE_MAX_SIZE.", new_size); - new_size = HPACK_ENC_TABLE_MAX_SIZE; - } - T_DBG3("%s: tbl->rb_len=%hu, tbl->size=%hu, tbl->window=%hu," - " new_size=%hu\n", __func__, tbl->rb_len, tbl->size, - tbl->window, new_size); + " requested_size=%u\n", __func__, tbl->rb_len, tbl->size, + tbl->window, requested_size); /* * RFC7541#section-4.2: @@ -3744,9 +3740,13 @@ tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) * size that occurs in that interval MUST be signaled in a dynamic * table size update. */ - if (tbl->window != new_size && (likely(!tbl->wnd_changed) - || unlikely(!tbl->window) || new_size < tbl->window)) + if (tbl->window != requested_size && (likely(!tbl->wnd_changed) + || unlikely(!tbl->window) || requested_size < tbl->window)) { + unsigned short new_size = min_t(unsigned int, requested_size, + HPACK_ENC_TABLE_MAX_SIZE); + BUILD_BUG_ON(HPACK_ENC_TABLE_MAX_SIZE > USHRT_MAX || + sizeof(new_size) != sizeof(tbl->window)); if (tbl->size > new_size) tfw_hpack_rbuf_calc(tbl, new_size, NULL, (TfwHPackETblIter *)tbl); @@ -3754,11 +3754,15 @@ tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) tbl->window = new_size; tbl->wnd_changed = true; + + T_DBG3("%s: New hpack encoder table size has been set to %u\n", + __func__, tbl->window); } } int -tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream) +tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *tbl, struct sk_buff *skb_head, + unsigned int offset, unsigned int *acc_len) { TfwHPackInt tmp = {}; TfwStr dst = {}; @@ -3769,18 +3773,16 @@ tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream) WARN_ON_ONCE(!tbl->wnd_changed); write_int(tbl->window, 0x1F, 0x20, &tmp); - data = ss_skb_data_ptr_by_offset(stream->xmit.skb_head, - FRAME_HEADER_SIZE); + data = ss_skb_data_ptr_by_offset(skb_head, + offset + FRAME_HEADER_SIZE); BUG_ON(!data); - r = ss_skb_get_room_w_frag(stream->xmit.skb_head, - stream->xmit.skb_head, - data, tmp.sz, &dst, &_); + r = ss_skb_get_room_w_frag(skb_head, skb_head, data, tmp.sz, &dst, &_); if (unlikely(r)) return r; memcpy_fast(dst.data, tmp.buf, tmp.sz); - stream->xmit.h_len += tmp.sz; + *acc_len += tmp.sz; tbl->wnd_changed = false; return 0; diff --git a/fw/hpack.h b/fw/hpack.h index 1a139b5bb..bf5f5324d 100644 --- a/fw/hpack.h +++ b/fw/hpack.h @@ -302,11 +302,12 @@ void write_int(unsigned long index, unsigned short max, unsigned short mask, int tfw_hpack_init(TfwHPack *__restrict hp, TfwClientMem *owner, unsigned int htbl_sz); void tfw_hpack_clean(TfwHPack *__restrict hp); -int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr); +int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, + bool dyn_indexing); int tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, bool use_pool, bool dyn_indexing); void tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, - unsigned short new_size); + unsigned int new_size); int tfw_hpack_decode(TfwHPack *__restrict hp, unsigned char *__restrict src, unsigned long n, TfwHttpReq *__restrict req, unsigned int *__restrict parsed); @@ -314,7 +315,9 @@ int tfw_hpack_cache_decode_expand(TfwHPack *__restrict hp, TfwHttpResp *__restrict resp, unsigned char *__restrict src, unsigned long n, TfwDecodeCacheIter *__restrict cd_iter); -int tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream); +int tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *tbl, struct sk_buff *skb_head, + unsigned int offset, + unsigned int *acc_len); static inline unsigned int tfw_hpack_int_size(unsigned long index, unsigned short max) diff --git a/fw/http.c b/fw/http.c index d7960ddac..4d51fc105 100644 --- a/fw/http.c +++ b/fw/http.c @@ -85,7 +85,6 @@ #include #include #include -#include #undef DEBUG #if DBG_HTTP > 0 @@ -1818,31 +1817,12 @@ do { \ } } -static void -__tfw_http_free_cleanup(TfwHttpMsgCleanup *cleanup) -{ - int i; - struct sk_buff *skb; - - while ((skb = ss_skb_dequeue(&cleanup->skb_head))) - __ss_kfree_skb(skb); - - for (i = 0; i < cleanup->pages_sz; i++) - /* - * Pass "true" even for non recyclable pages, relying on check - * pp_magic == PP_SIGNATURE in napi_pp_put_page(), which avoid - * recycling of non page_pool pages. Overhead seems the same - * as to have/maintain flag for each fragment. - */ - skb_page_unref(cleanup->pages[i], true); -} - static void __tfw_http_req_cleanup(TfwHttpReq *req) { if (!req->cleanup) return; - __tfw_http_free_cleanup(req->cleanup); + ss_skb_free_cleanup(req->cleanup); req->cleanup = NULL; } @@ -3918,7 +3898,7 @@ tfw_h1_adjust_req(TfwHttpReq *req) req->vhost, TFW_VHOST_HDRMOD_REQ); - req->cleanup = tfw_pool_alloc(hm->pool, sizeof(TfwHttpMsgCleanup)); + req->cleanup = tfw_pool_alloc(hm->pool, sizeof(TfwSkbCleanup)); if (unlikely(!req->cleanup)) return -ENOMEM; req->cleanup->pages_sz = 0; @@ -4272,10 +4252,10 @@ tfw_h2_adjust_req(TfwHttpReq *req) bool need_cl = req->body.len && TFW_STR_EMPTY(&ht->tbl[TFW_HTTP_HDR_CONTENT_LENGTH]); - req->cleanup = tfw_pool_alloc(req->pool, sizeof(TfwHttpMsgCleanup)); + req->cleanup = tfw_pool_alloc(req->pool, sizeof(TfwSkbCleanup)); if (unlikely(!req->cleanup)) return -ENOMEM; - memset(req->cleanup, 0, sizeof(TfwHttpMsgCleanup)); + memset(req->cleanup, 0, sizeof(TfwSkbCleanup)); if (need_cl) { cl_data_len = tfw_ultoa(req->body.len, cl_data, TFW_ULTOA_BUF_SIZ); @@ -4615,7 +4595,7 @@ tfw_http_resp_get_conn_flags(TfwHttpResp *resp) * headers will be avoided. */ static int -tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) +tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwSkbCleanup *cleanup) { void *opaque_data = TFW_SKB_CB(resp->msg.skb_head)->opaque_data; TfwMsgIter *iter = &resp->iter; @@ -4638,7 +4618,7 @@ tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) } static int -tfw_h1_resp_cutoff_headers(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) +tfw_h1_resp_cutoff_headers(TfwHttpResp *resp, TfwSkbCleanup *cleanup) { TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwHttpReq *req = resp->req; @@ -4736,7 +4716,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp) TfwHttpReq *req = resp->req; TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwMsgIter *iter = &resp->iter; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; const TfwHdrMods *h_mods = tfw_vhost_get_hdr_mods(req->location, req->vhost, TFW_VHOST_HDRMOD_RESP); @@ -4789,7 +4769,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp) r = tfw_http_msg_expand_from_pool(hm, &STR_CRLF); clean: - __tfw_http_free_cleanup(&cleanup); + ss_skb_free_cleanup(&cleanup); return r; } @@ -5385,7 +5365,7 @@ tfw_h2_hpack_encode_headers(TfwHttpResp *resp, const TfwHdrMods *h_mods) || tgt->flags & TFW_STR_TRAILER_HDR) continue; - r = tfw_hpack_transform(resp, tgt); + r = tfw_hpack_transform(resp, tgt, true); if (unlikely(r)) return r; } @@ -5883,7 +5863,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) TfwHttpReq *req = resp->req; TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwHttpTransIter *mit = &resp->mit; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwStr codings = {}; const TfwHdrMods *h_mods = tfw_vhost_get_hdr_mods(req->location, req->vhost, @@ -5985,7 +5965,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) req, resp); SS_SKB_QUEUE_DUMP(&resp->msg.skb_head); - __tfw_http_free_cleanup(&cleanup); + ss_skb_free_cleanup(&cleanup); return r; } diff --git a/fw/http.h b/fw/http.h index c77100f83..b6c9bd334 100644 --- a/fw/http.h +++ b/fw/http.h @@ -343,19 +343,6 @@ typedef struct { long m_date; } TfwHttpCond; -/** - * Represents the data that should be cleaned up after message transformation. - * - * @skb_head - head of skb list that must be freed; - * @pages - pages that must be freed; - * @pages_sz - current number of @pages; - */ -typedef struct { - struct sk_buff *skb_head; - netmem_ref pages[MAX_SKB_FRAGS]; - unsigned char pages_sz; -} TfwHttpMsgCleanup; - /** * HTTP Request. * @@ -404,7 +391,7 @@ struct tfw_http_req_t { TfwHttpSess *sess; TfwClient *peer; void *stale_ce; - TfwHttpMsgCleanup *cleanup; + TfwSkbCleanup *cleanup; TfwHttpCond cond; TfwMsgParseIter pit; HttpTfh tfh; @@ -815,5 +802,4 @@ void tfw_http_extract_request_authority(TfwHttpReq *req); bool tfw_http_mark_is_in_whitlist(unsigned int mark); char *tfw_http_resp_status_line(int status, size_t *len); int tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head); - #endif /* __TFW_HTTP_H__ */ diff --git a/fw/http2.c b/fw/http2.c index 21c1ab1f6..871f21f10 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -99,9 +99,8 @@ tfw_h2_apply_settings_entry(TfwH2Ctx *ctx, unsigned short id, switch (id) { case HTTP2_SETTINGS_TABLE_SIZE: - dest->hdr_tbl_sz = min_t(unsigned int, - val, HPACK_ENC_TABLE_MAX_SIZE); - tfw_hpack_set_rbuf_size(&ctx->hpack.enc_tbl, dest->hdr_tbl_sz); + tfw_hpack_set_rbuf_size(&ctx->hpack.enc_tbl, val); + dest->hdr_tbl_sz = ctx->hpack.enc_tbl.window; break; case HTTP2_SETTINGS_ENABLE_PUSH: @@ -627,7 +626,7 @@ tfw_h2_hpack_encode_trailer_headers(TfwHttpResp *resp) T_DBG3("%s: hid=%hu, d_num=%hu, nchunks=%u\n", __func__, hid, d_num, ht->tbl[hid].nchunks); - r = tfw_hpack_transform(resp, tgt); + r = tfw_hpack_transform(resp, tgt, false); if (unlikely(r)) goto finish; } diff --git a/fw/http_frame.c b/fw/http_frame.c index e7d1bf5a8..24e9e2add 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -199,7 +199,19 @@ do { \ return T_BAD; \ } else if (res == STREAM_FSM_RES_TERM_STREAM) { \ WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); \ - return tfw_h2_current_stream_send_rst((ctx), err); \ + if (ctx->cur_stream != ctx->cur_send_headers) { \ + return tfw_h2_current_stream_send_rst((ctx), \ + err); \ + } else { \ + unsigned int id = ctx->cur_stream->id; \ + /** + * If Tempesta is already sending headers, + * only update the stream state and schedule + * sending an RST_STREAM frame, but do not + * remove the stream from the scheduling queue. + */ \ + return tfw_h2_send_rst_stream(ctx, id, err); \ + } \ } \ return T_OK; \ } \ @@ -667,15 +679,13 @@ tfw_h2_headers_process(TfwH2Ctx *ctx) T_DBG("Invalid dependency: new stream with %u depends on" " itself\n", hdr->stream_id); - ctx->state = HTTP2_IGNORE_FRAME_DATA; - - if (likely(!ctx->cur_stream)) { - return tfw_h2_send_rst_stream(ctx, hdr->stream_id, - HTTP2_ECODE_PROTO); - } - - WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); - return tfw_h2_current_stream_send_rst(ctx, HTTP2_ECODE_PROTO); + /* + * RFC 7540 states that it MUST be treated as a stream-level + * error, however, it doesn’t make sense to continue servicing + * a suspicious connection. + */ + tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); + return T_BAD; } if (likely(!ctx->cur_stream)) { @@ -777,18 +787,6 @@ tfw_h2_priority_process(TfwH2Ctx *ctx) return T_OK; } - if (ctx->cur_stream->state == HTTP2_STREAM_IDLE) { - /* - * According to RFC 9113 we should response with stream - * error of type PROTOCOL ERROR here, but we can't send - * RST_STREAM for idle stream. - * RFC 9113 doesn't describe this case, so terminate - * connection. - */ - tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); - return T_BAD; - } - /* * Stream cannot depend on itself (see RFC 7540 section 5.1.2 for * details). @@ -796,12 +794,14 @@ tfw_h2_priority_process(TfwH2Ctx *ctx) T_DBG("Invalid dependency: new stream with %u depends on itself\n", hdr->stream_id); - if (tfw_h2_stream_fsm_ignore_err(ctx, ctx->cur_stream, - HTTP2_RST_STREAM, 0)) - return -EPERM; + /* + * RFC 7540 states that it MUST be treated as a stream-level error, + * however, it doesn’t make sense to continue servicing a suspicious + * connection. + */ + tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); - WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); - return tfw_h2_current_stream_send_rst(ctx, HTTP2_ECODE_PROTO); + return T_BAD; } static inline void @@ -1638,7 +1638,8 @@ tfw_h2_frame_recv(void *data, unsigned char *buf, unsigned int len, T_FSM_STATE(HTTP2_RECV_FRAME_RST_STREAM) { FRAME_FSM_READ_SRVC(ctx->to_read); - tfw_h2_rst_stream_process(ctx); + if (ctx->cur_stream != ctx->cur_send_headers) + tfw_h2_rst_stream_process(ctx); FRAME_FSM_EXIT(T_OK); } @@ -2054,6 +2055,24 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, bool trailers = false; char *data; int r = 0; + unsigned char flags; + + if (type == HTTP2_DATA) { + ctx->rem_wnd -= frame_length; + ctx->data_bytes_sent += frame_length; + stream->rem_wnd -= frame_length; + stream->xmit.b_len -= frame_length; + } else if (stream->xmit.h_len) { + stream->xmit.h_len -= frame_length; + } else if (stream->xmit.t_len) { + stream->xmit.t_len -= frame_length; + trailers = true; + } + flags = tfw_h2_calc_frame_flags(stream, type, trailers); + + r = tfw_h2_stream_fsm_ignore_err(ctx, stream, type, flags); + if (unlikely(r)) + return r; /* * Very unlikely case, when skb_head and one or more next skbs @@ -2088,54 +2107,25 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, data = dst.data; } - if (type == HTTP2_DATA) { - ctx->rem_wnd -= frame_length; - ctx->data_bytes_sent += frame_length; - stream->rem_wnd -= frame_length; - stream->xmit.b_len -= frame_length; - } else if (stream->xmit.h_len) { - stream->xmit.h_len -= frame_length; - } else if (stream->xmit.t_len) { - stream->xmit.t_len -= frame_length; - trailers = true; - } - frame_hdr.length = frame_length; frame_hdr.stream_id = stream->id; frame_hdr.type = type; - frame_hdr.flags = tfw_h2_calc_frame_flags(stream, type, trailers); + frame_hdr.flags = flags; tfw_h2_pack_frame_header(data, &frame_hdr); stream->xmit.frame_length += frame_length + FRAME_HEADER_SIZE; - switch (tfw_h2_stream_fsm_ignore_err(ctx, stream, type, - frame_hdr.flags)) - { - case STREAM_FSM_RES_OK: - break; - case STREAM_FSM_RES_IGNORE: - fallthrough; - case STREAM_FSM_RES_TERM_STREAM: - /* Send previosly successfully prepared frames if exist. */ - stream->xmit.frame_length -= frame_length + FRAME_HEADER_SIZE; - if (stream->xmit.frame_length) { - r = tfw_h2_entail_stream_skb(sk, ctx, stream, - &stream->xmit.frame_length, - true); - } - stream->xmit.frame_length += frame_length + FRAME_HEADER_SIZE; - /* - * Purge stream send queue, but leave postponed - * skbs and rst stream/goaway/tls alert if exist. - */ - tfw_h2_stream_purge_send_queue(stream); - return r; - case STREAM_FSM_RES_TERM_CONN: - return -EPIPE; - } return r; } +static inline int +__tfw_h2_is_ready_to_send_postponed(const TfwH2Ctx *ctx, + const TfwStream *stream) +{ + return stream->xmit.postponed && !stream->xmit.frame_length && + !ctx->cur_send_headers; +} + static int tfw_h2_stream_send_postponed(struct sock *sk, struct sk_buff **skb_head, unsigned int mss_now, unsigned long *snd_wnd) @@ -2180,7 +2170,7 @@ do { \ T_FSM_EXIT(); \ } while(0) -#define CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(type, len) \ +#define CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(type, len) \ do { \ unsigned int max_len; \ unsigned int min_len; \ @@ -2191,15 +2181,25 @@ do { \ } \ max_len = min(TLS_MAX_PAYLOAD_SIZE, *snd_wnd - TLS_MAX_OVERHEAD); \ max_len -= FRAME_HEADER_SIZE; \ - min_len = min(min_to_send, (unsigned int)len); \ frame_length = tfw_h2_calc_frame_length(ctx, stream, type, len, \ max_len); \ - /* \ - * If the lenght of data to send is less then `min_to_send` \ - * use it as a minimum bytes to send. \ + /* + * Apply min sending data optimization only if there is no already + * prepared frames, this prevents from cutting large skb and prevents + * data postponing when we can simply send remaining data in multiple + * tcp segments offloading SKB segmenation to NIC. This avoids an + * additional call to ss_skb_split(). + * */ \ - if (frame_length < min_len) \ - ADJUST_BLOCKED_STREAMS_AND_EXIT(min_len, type); \ + if (stream->xmit.frame_length == 0) { \ + /* + * If the length of data to send is less then `min_to_send` + * use it as a minimum bytes to send. + */ \ + min_len = min_t(int, min_to_send, len); \ + if (frame_length < min_len) \ + ADJUST_BLOCKED_STREAMS_AND_EXIT(min_len, type); \ + } \ frame_type = type; \ } while(0) @@ -2229,37 +2229,48 @@ do { \ } T_FSM_STATE(HTTP2_MAKE_HEADERS_FRAMES) { - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_HEADERS, - stream->xmit.h_len); + /* + * This call doesn't change the stream state, but sets ctx->cur_send_headers. + * We do this to force the stream scheduler to select this + * stream during next sending if current sending of this stream + * has been postponed due to lack of tcp window. + */ + r = tfw_h2_stream_fsm_ignore_err(ctx, stream, HTTP2_HEADERS, 0); + if (unlikely(r)) { + T_WARN("Wrong state during sending headers.\n"); + return -EPIPE; + } + if (unlikely(ctx->hpack.enc_tbl.wnd_changed)) { r = tfw_hpack_enc_tbl_write_sz(&ctx->hpack.enc_tbl, - stream); + stream->xmit.skb_head, + 0, + &stream->xmit.h_len); if (unlikely(r < 0)) { - T_WARN("Failed to encode hpack dynamic" + T_WARN("Failed to encode hpack dynamic " "table size %d", r); return r; } } + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_HEADERS, + stream->xmit.h_len); + r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely(r)) { - T_WARN("Failed to make headers frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); } T_FSM_STATE(HTTP2_MAKE_CONTINUATION_FRAMES) { - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_CONTINUATION, - stream->xmit.h_len); + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_CONTINUATION, + stream->xmit.h_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely(r)) { - T_WARN("Failed to make continuation frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); } @@ -2268,14 +2279,12 @@ do { \ if (tfw_h2_conn_or_stream_wnd_is_exceeded(ctx, stream)) ADJUST_BLOCKED_STREAMS_AND_EXIT(0, HTTP2_DATA); - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_DATA, - stream->xmit.b_len); + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_DATA, + stream->xmit.b_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely (r)) { - T_WARN("Failed to make data frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); ctx->data_frames_sent++; FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); @@ -2283,73 +2292,87 @@ do { \ T_FSM_STATE(HTTP2_MAKE_TRAILER_FRAMES) { is_trailer_cont = true; - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_HEADERS, - stream->xmit.t_len); + /* + * This call doesn't change the stream state, but sets ctx->cur_send_headers. + * We do this to force the stream scheduler to select this + * stream during next sending if current sending of this stream + * has been postponed due to lack of tcp window. + */ + r = tfw_h2_stream_fsm_ignore_err(ctx, stream, HTTP2_HEADERS, 0); + + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_HEADERS, + stream->xmit.t_len); + + if (unlikely(ctx->hpack.enc_tbl.wnd_changed)) { + r = tfw_hpack_enc_tbl_write_sz(&ctx->hpack.enc_tbl, + stream->xmit.skb_head, + stream->xmit.frame_length, + &stream->xmit.t_len); + if (unlikely(r < 0)) { + T_WARN("Failed to encode hpack dynamic " + "table size %d", r); + return r; + } + } r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely(r)) { - T_WARN("Failed to make trail headers frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); } T_FSM_STATE(HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES) { is_trailer_cont = true; - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_CONTINUATION, - stream->xmit.t_len); + CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT(HTTP2_CONTINUATION, + stream->xmit.t_len); r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, frame_length); - if (unlikely(r)) { - T_WARN("Failed to make trail continuation frame %d", r); - return r; - } + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); } T_FSM_STATE(HTTP2_SEND_FRAMES) { if (likely(stream->xmit.frame_length)) { - r = tfw_h2_entail_stream_skb(sk, ctx, stream, - &stream->xmit.frame_length, - false); + r = tfw_h2_entail_stream_skb(sk, ctx, stream, + &stream->xmit.frame_length, + false); if (unlikely(r)) { T_WARN("Failed to send frame %d", r); return r; } } - if (stream->xmit.h_len) { + if (stream->xmit.h_len) T_FSM_JMP(HTTP2_MAKE_CONTINUATION_FRAMES); - } else { - if (unlikely(stream->xmit.postponed) - && !stream->xmit.frame_length - && !ctx->cur_send_headers) - { - struct sk_buff **head = &stream->xmit.postponed; - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, - snd_wnd); - if (unlikely(r)) { - T_WARN("Failed to send postponed" - " frames %d", r); - return r; - } + if (unlikely(__tfw_h2_is_ready_to_send_postponed(ctx, + stream))) { + struct sk_buff **head = &stream->xmit.postponed; + + r = tfw_h2_stream_send_postponed(sk, head, mss_now, + snd_wnd); + if (unlikely(r)) { + T_WARN("Failed to send postponed frames %d", r); + return r; } - if (stream->xmit.b_len) { - T_FSM_JMP(HTTP2_MAKE_DATA_FRAMES); - } else if (stream->xmit.t_len) { - if (likely(!is_trailer_cont)) { - T_FSM_JMP(HTTP2_MAKE_TRAILER_FRAMES); - } else { - T_FSM_JMP(HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES); - } + } + + if (stream->xmit.b_len) { + T_FSM_JMP(HTTP2_MAKE_DATA_FRAMES); + } + else if (stream->xmit.t_len) { + if (likely(!is_trailer_cont)) { + T_FSM_JMP(HTTP2_MAKE_TRAILER_FRAMES); } else { - fallthrough; + T_FSM_JMP(HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES); } } + else { + fallthrough; + } } T_FSM_STATE(HTTP2_MAKE_FRAMES_FINISH) { @@ -2361,8 +2384,8 @@ do { \ */ if (unlikely(stream->xmit.skb_head)) { struct sk_buff **head = &stream->xmit.skb_head; - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, + + r = tfw_h2_stream_send_postponed(sk, head, mss_now, snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" @@ -2372,12 +2395,73 @@ do { \ } if (stream == ctx->error) ctx->error = NULL; - /* - * Don't put exclusive streams in closed queue it - * will be immediately deleted in the caller function. - */ - if (!stream_is_exclusive) - tfw_h2_stream_add_closed(ctx, stream); + T_FSM_EXIT(); + } + + /* + * In this state we handle framing error. It may happen if we trying + * to send DATA frames to closed stream. e.g Stream closed by the + * client while receiving response, that is valid behavior for firefox. + */ + T_FSM_STATE(HTTP2_FRAMING_FAILED) { + switch (r) { + case STREAM_FSM_RES_IGNORE: + fallthrough; + case STREAM_FSM_RES_TERM_STREAM: + /* + * In this case r is positive, set it to zero to not + * return positive r from this function. That doesn't + * reset connection but stops stream scheduling even + * if we have enough send window. + */ + r = 0; + /* Send previosly successfully prepared frames if exist. */ + if (stream->xmit.frame_length) { + r = tfw_h2_entail_stream_skb(sk, ctx, stream, + &stream->xmit.frame_length, + true); + if (unlikely(r)) + return r; + } + + /* During headers insertion we already subtract + * @frame_length from b_len, h_len or t_len. However + * tfw_h2_stream_purge_send_queue() need actual + * size of the queue data to completely free it. Thus + * restore actual size here. + */ + stream->xmit.frame_length += frame_length; + /** + * Purge stream send queue, but leave postponed + * skbs and rst stream/goaway/tls alert if exist. + */ + tfw_h2_stream_purge_send_queue(stream); + + if (unlikely(stream->xmit.postponed) && + !ctx->cur_send_headers) { + struct sk_buff **head = &stream->xmit.postponed; + + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); + if (unlikely(r)) { + T_WARN("Failed to send postponed" + " frames %d", r); + return r; + } + } + T_FSM_JMP(HTTP2_MAKE_FRAMES_FINISH); + case STREAM_FSM_RES_TERM_CONN: + return -EPIPE; + default: + /* + * Framing error not occurred but framing failed state + * reached. + */ + WARN_ON_ONCE(!r); + return -EPIPE; + } + T_FSM_EXIT(); } @@ -2398,8 +2482,7 @@ do { \ { struct sk_buff **head = &stream->xmit.postponed; - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, + r = tfw_h2_stream_send_postponed(sk, head, mss_now, snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" @@ -2412,7 +2495,7 @@ do { \ return r; #undef FRAME_XMIT_FSM_NEXT -#undef CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE +#undef CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT #undef ADJUST_BLOCKED_STREAMS_AND_EXIT } @@ -2467,6 +2550,8 @@ tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now, if (stream_is_exclusive) { tfw_h2_stream_clean(ctx, stream); } else { + tfw_h2_stream_add_closed(ctx, stream); + TfwStreamSchedEntry *parent = stream->sched->parent; diff --git a/fw/http_msg.c b/fw/http_msg.c index 09440a53d..4971ce20d 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -1303,52 +1303,6 @@ tfw_h2_msg_expand_from_pool_lc(TfwHttpMsg *hm, const TfwStr *str, return r; } -static inline void -__tfw_http_msg_move_frags(struct sk_buff *skb, int frag_idx, - TfwHttpMsgCleanup *cleanup) -{ - int i, len; - struct skb_shared_info *si = skb_shinfo(skb); - - for (i = 0, len = 0; i < frag_idx; i++) { - cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); - cleanup->pages_sz++; - len += skb_frag_size(&si->frags[i]); - } - - si->nr_frags -= frag_idx; - ss_skb_adjust_data_len(skb, -len); - memmove(&si->frags, &si->frags[frag_idx], - (si->nr_frags) * sizeof(skb_frag_t)); -} - -static inline void -__tfw_http_msg_rm_all_frags(struct sk_buff *skb, TfwHttpMsgCleanup *cleanup) -{ - int i, len; - struct skb_shared_info *si = skb_shinfo(skb); - - for (i = 0; i < si->nr_frags; i++) - cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); - - len = skb->data_len; - cleanup->pages_sz = si->nr_frags; - si->nr_frags = 0; - ss_skb_adjust_data_len(skb, -len); -} - -static inline void -__tfw_http_msg_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_idx]; - const int len = nbegin - (char*)skb_frag_address(frag); - - /* Add offset and decrease fragment's size */ - skb_frag_off_add(frag, len); - skb_frag_size_sub(frag, len); - ss_skb_adjust_data_len(skb, -len); -} - /* * Delete SKBs and paged fragments related to @hm that contains message * headers. SKBs and fragments will be "unlinked" and placed to @cleanup. @@ -1356,7 +1310,7 @@ __tfw_http_msg_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin * as source for message trasformation. */ int -tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) +tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwSkbCleanup *cleanup) { int i, r = 0; char *begin, *end; @@ -1383,8 +1337,7 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) it->skb, body); break; } else { - ss_skb_put(it->skb, -skb_headlen(it->skb)); - it->skb->tail_lock = 1; + ss_skb_remove_linear_data(it->skb); } } @@ -1402,14 +1355,14 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) * fragments from skb where LF is located. */ if (!body) { - __tfw_http_msg_rm_all_frags(it->skb, cleanup); + ss_skb_rm_all_frags(it->skb, cleanup); goto end; } else if (off != begin) { /* * Fragment contains headers and body. * Set beginning of frag as beginning of body. */ - __tfw_http_msg_shrink_frag(it->skb, i, off); + ss_skb_shrink_frag(it->skb, i, off); } /* @@ -1418,7 +1371,7 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) * from skb. */ if (i >= 1) - __tfw_http_msg_move_frags(it->skb, i, cleanup); + ss_skb_shift_frags(it->skb, i, cleanup); goto end; } diff --git a/fw/http_msg.h b/fw/http_msg.h index b255f25c5..3608b6758 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -163,7 +163,7 @@ int tfw_h2_msg_expand_from_pool_lc(TfwHttpMsg *hm, const TfwStr *str, TfwHttpTransIter *mit); int __hdr_name_cmp(const TfwStr *hdr, const TfwStr *cmp_hdr); int __http_hdr_lookup(TfwHttpMsg *hm, const TfwStr *hdr); -int tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup); +int tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwSkbCleanup *cleanup); #define TFW_H2_MSG_HDR_ADD(hm, name, val, idx) \ tfw_h2_msg_hdr_add(hm, name, sizeof(name) - 1, val, \ diff --git a/fw/http_stream.c b/fw/http_stream.c index 923ea20e0..c903977dc 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -346,10 +346,11 @@ do { \ } \ } while(0) -#define TFW_H2_FSM_TYPE_CHECK(ctx, stream, op, type) \ +#define TFW_H2_FSM_TYPE_CHECK(ctx, op, type, is_send) \ do { \ if ((ctx->cur_##op##_headers \ - && (type != HTTP2_CONTINUATION && type != HTTP2_RST_STREAM)) \ + && ((type == HTTP2_HEADERS && !is_send) || \ + (type != HTTP2_HEADERS && type != HTTP2_CONTINUATION))) \ || (!ctx->cur_##op##_headers && type == HTTP2_CONTINUATION)) { \ *err = HTTP2_ECODE_PROTO; \ res = STREAM_FSM_RES_TERM_CONN; \ @@ -372,7 +373,7 @@ do { \ if (send) { TFW_H2_FSM_STREAM_CHECK(ctx, stream, send); - TFW_H2_FSM_TYPE_CHECK(ctx, stream, send, type); + TFW_H2_FSM_TYPE_CHECK(ctx, send, type, true); /* * Usually we would send HEADERS/CONTINUATION or DATA frames * to the client when HTTP2_STREAM_REM_HALF_CLOSED state @@ -399,7 +400,7 @@ do { \ */ } else { TFW_H2_FSM_STREAM_CHECK(ctx, stream, recv); - TFW_H2_FSM_TYPE_CHECK(ctx, stream, recv, type); + TFW_H2_FSM_TYPE_CHECK(ctx, recv, type, false); } switch (tfw_h2_get_stream_state(stream)) { @@ -753,9 +754,24 @@ do { \ break; case HTTP2_STREAM_CLOSED: - T_WARN("%s, stream fully closed: stream->id=%u, type=%hhu," + T_DBG2("%s, stream fully closed: stream->id=%u, type=%hhu," " flags=0x%hhx\n", __func__, stream->id, type, flags); if (send) { + const bool is_headers = type == HTTP2_HEADERS || + type == HTTP2_CONTINUATION; + + if (ctx->cur_send_headers && is_headers) { + /* + * Headers has been sent in closed state. + * It happens when Tempesta encoded all headers + * and after received RST_STREAM. To not break + * compression state Tempesta sends all remaining + * headers. + */ + if (flags & HTTP2_F_END_HEADERS) + ctx->cur_send_headers = NULL; + break; + } res = STREAM_FSM_RES_IGNORE; } else { if (type != HTTP2_PRIORITY) { @@ -763,16 +779,12 @@ do { \ res = STREAM_FSM_RES_TERM_CONN; } } - break; default: BUG(); } finish: - if (type == HTTP2_RST_STREAM || res == STREAM_FSM_RES_TERM_STREAM) - tfw_h2_conn_reset_stream_on_close(ctx, stream); - T_DBG4("exit %s: strm [%p] state %d(%s), res %d\n", __func__, stream, tfw_h2_get_stream_state(stream), __h2_strm_st_n(stream), res); diff --git a/fw/http_stream.h b/fw/http_stream.h index 4e9b2c5d4..fd675e81e 100644 --- a/fw/http_stream.h +++ b/fw/http_stream.h @@ -67,6 +67,7 @@ typedef enum { HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES, HTTP2_SEND_FRAMES, HTTP2_MAKE_FRAMES_FINISH, + HTTP2_FRAMING_FAILED } TfwStreamXmitState; static const char *__tfw_strm_st_names[] = { diff --git a/fw/ss_skb.c b/fw/ss_skb.c index e2596fdd9..39648ffb3 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -25,7 +25,6 @@ */ #include #include -#include #include #include #include @@ -1702,8 +1701,7 @@ ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, if (!split_point) { /* Usage of linear portion of SKB is not expected */ - ss_skb_put(skb, -skb_headlen(skb)); - skb->tail_lock = 1; + ss_skb_remove_linear_data(skb); } else { unsigned int off = split_point - skb->data; diff --git a/fw/ss_skb.h b/fw/ss_skb.h index b0febb0c0..809c907e6 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -23,6 +23,7 @@ #define __TFW_SS_SKB_H__ #include +#include #include #include "str.h" @@ -59,6 +60,19 @@ struct tfw_skb_cb { #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) +/** + * Represents a data that should be cleaned up. + * + * @skb_head - head of skb list that must be freed; + * @pages - pages that must be freed; + * @pages_sz - current number of @pages; + */ +typedef struct { + struct sk_buff *skb_head; + netmem_ref pages[MAX_SKB_FRAGS]; + unsigned char pages_sz; +} TfwSkbCleanup; + void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), TfwClientMem *owner, unsigned int delta); void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); @@ -487,6 +501,87 @@ int ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, unsigned char *split_point); int ss_skb_realloc_headroom(struct sk_buff *skb); +static inline void +ss_skb_remove_linear_data(struct sk_buff *skb) +{ + ss_skb_put(skb, -skb_headlen(skb)); + skb->tail_lock = 1; +} + +/* Remove all paged fragments from @skb and move them into @cleanup. */ +static inline void +ss_skb_rm_all_frags(struct sk_buff *skb, TfwSkbCleanup *cleanup) +{ + int i, len; + struct skb_shared_info *si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) + cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); + + len = skb->data_len; + cleanup->pages_sz = si->nr_frags; + si->nr_frags = 0; + ss_skb_adjust_data_len(skb, -len); +} + +/* + * Remove paged fragments until @frag_idx and move them into @cleanup. Shift + * remaining fragments to the beginning of fragments array. + */ +static inline void +ss_skb_shift_frags(struct sk_buff *skb, int frag_idx, + TfwSkbCleanup *cleanup) +{ + int i, len; + struct skb_shared_info *si = skb_shinfo(skb); + + for (i = 0, len = 0; i < frag_idx; i++) { + cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); + cleanup->pages_sz++; + len += skb_frag_size(&si->frags[i]); + } + + si->nr_frags -= frag_idx; + ss_skb_adjust_data_len(skb, -len); + memmove(&si->frags, &si->frags[frag_idx], + (si->nr_frags) * sizeof(skb_frag_t)); +} + +/* + * Shrink fragment with @frag_idx index, set @nbegin as the starting position + * of that fragment. + */ +static inline void +ss_skb_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_idx]; + const int len = nbegin - (char *)skb_frag_address(frag); + + /* Add offset and decrease fragment's size */ + skb_frag_off_add(frag, len); + skb_frag_size_sub(frag, len); + ss_skb_adjust_data_len(skb, -len); +} + +static inline void +ss_skb_free_cleanup(TfwSkbCleanup *cleanup) +{ + int i; + struct sk_buff *skb; + + while ((skb = ss_skb_dequeue(&cleanup->skb_head))) + __ss_kfree_skb(skb); + + for (i = 0; i < cleanup->pages_sz; i++) + /* + * Pass "true" even for non recyclable pages, relying on check + * pp_magic == PP_SIGNATURE in napi_pp_put_page(), which avoid + * recycling of non page_pool pages. Overhead seems the same + * as to have/maintain flag for each fragment. + */ + skb_page_unref(cleanup->pages[i], true); +} + #if defined(DEBUG) && (DEBUG >= 4) #define ss_skb_queue_for_each_do(queue, lambda) \ do { \ diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index 6a9e5a3cc..c2f6f5619 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -165,7 +165,7 @@ TEST(http_msg, cutoff_linear_headers_paged_body) TFW_STR_STRING("paged_body") }; TfwStr *head = &frags[0], *pgd = &frags[1]; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -200,7 +200,7 @@ TEST(http_msg, cutoff_linear_headers_and_linear_body) TFW_STR_STRING("paged_body2") }; TfwStr *head = &frags[0], *pgd = &frags[1]; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -232,7 +232,7 @@ TEST(http_msg, expand_from_pool_for_headers) }; TfwStr *hdr = &frags[0], *head = &frags[0], *pgd = &frags[1]; TfwHttpMsg *msg = (TfwHttpMsg *)resp; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -287,7 +287,7 @@ TEST(http_msg, expand_from_pool_for_trailers) }; TfwStr *trailer = &frags[0], *head = &frags[1], *pgd = &frags[2]; TfwHttpMsg *msg = (TfwHttpMsg *)resp; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i;