diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go index d87d2f6..b807f56 100644 --- a/cmd/bricksllm/main.go +++ b/cmd/bricksllm/main.go @@ -359,7 +359,7 @@ func main() { scanner := pii.NewScanner(detector) cd := custompolicy.NewOpenAiDetector(cfg.CustomPolicyDetectionTimeout, cfg.OpenAiApiKey) - ps, err := proxy.NewProxyServer(log, *modePtr, *privacyPtr, c, m, rm, a, psm, cpm, store, memStore, ce, ace, aoe, v, rec, messageBus, rlm, cfg.ProxyTimeout, accessCache, userAccessCache, pm, scanner, cd, die, um) + ps, err := proxy.NewProxyServer(log, *modePtr, *privacyPtr, c, m, rm, a, psm, cpm, store, memStore, ce, ace, aoe, v, rec, messageBus, rlm, cfg.ProxyTimeout, accessCache, userAccessCache, pm, scanner, cd, die, um, cfg.RemoveUserAgent) if err != nil { log.Sugar().Fatalf("error creating proxy http server: %v", err) } diff --git a/go.mod b/go.mod index 5f596f5..40b6f34 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/mattn/go-colorable v0.1.13 github.com/pkoukk/tiktoken-go v0.1.7 github.com/redis/go-redis/v9 v9.0.5 - github.com/sashabaranov/go-openai v1.24.0 + github.com/sashabaranov/go-openai v1.26.3 github.com/stretchr/testify v1.8.4 github.com/tidwall/gjson v1.17.0 go.uber.org/zap v1.24.0 diff --git a/go.sum b/go.sum index 117c502..85cb81a 100644 --- a/go.sum +++ b/go.sum @@ -157,6 +157,8 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/sashabaranov/go-openai v1.24.0 h1:4H4Pg8Bl2RH/YSnU8DYumZbuHnnkfioor/dtNlB20D4= github.com/sashabaranov/go-openai v1.24.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= +github.com/sashabaranov/go-openai v1.26.3 h1:Tjnh4rcvsSU68f66r05mys+Zou4vo4qyvkne6AIRJPI= +github.com/sashabaranov/go-openai v1.26.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= diff --git a/internal/config/config.go b/internal/config/config.go index f603c94..e4061b2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -43,6 +43,7 @@ type Config struct { AmazonRegion string `koanf:"amazon_region" env:"AMAZON_REGION" envDefault:"us-west-2"` AmazonRequestTimeout time.Duration `koanf:"amazon_request_timeout" env:"AMAZON_REQUEST_TIMEOUT" envDefault:"5s"` AmazonConnectionTimeout time.Duration `koanf:"amazon_connection_timeout" env:"AMAZON_CONNECTION_TIMEOUT" envDefault:"10s"` + RemoveUserAgent bool `koanf:"remove_user_agent" env:"REMOVE_USER_AGENT" envDefault:"false"` } func prepareDotEnv(envFilePath string) error { diff --git a/internal/server/web/proxy/anthropic.go b/internal/server/web/proxy/anthropic.go index 4f8e05a..97e79ed 100644 --- a/internal/server/web/proxy/anthropic.go +++ b/internal/server/web/proxy/anthropic.go @@ -28,13 +28,17 @@ type anthropicEstimator interface { CountMessagesTokens(messages []anthropic.Message) int } -func copyHttpHeaders(source *http.Request, dest *http.Request) { +func copyHttpHeaders(source *http.Request, dest *http.Request, removeUseAgent bool) { for k := range source.Header { if strings.ToLower(k) != "X-CUSTOM-EVENT-ID" { dest.Header.Set(k, source.Header.Get(k)) } } + if removeUseAgent { + dest.Header.Del("User-Agent") + } + dest.Header.Set("Accept-Encoding", "*") } @@ -66,7 +70,7 @@ func getCompletionHandler(prod, private bool, client http.Client, timeOut time.D // return // } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { @@ -320,7 +324,7 @@ func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstim return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { diff --git a/internal/server/web/proxy/audio.go b/internal/server/web/proxy/audio.go index 1e77e51..aa50a62 100644 --- a/internal/server/web/proxy/audio.go +++ b/internal/server/web/proxy/audio.go @@ -40,7 +40,7 @@ func getSpeechHandler(prod bool, client http.Client, timeOut time.Duration) gin. return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) start := time.Now() @@ -187,7 +187,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, timeOut time.Durati return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) var b bytes.Buffer writer := multipart.NewWriter(&b) @@ -351,7 +351,7 @@ func getTranslationsHandler(prod bool, client http.Client, timeOut time.Duration return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) var b bytes.Buffer writer := multipart.NewWriter(&b) diff --git a/internal/server/web/proxy/azure_chat_completion.go b/internal/server/web/proxy/azure_chat_completion.go index 2e3c1aa..907f122 100644 --- a/internal/server/web/proxy/azure_chat_completion.go +++ b/internal/server/web/proxy/azure_chat_completion.go @@ -50,7 +50,7 @@ func getAzureChatCompletionHandler(prod, private bool, client http.Client, aoe a return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { diff --git a/internal/server/web/proxy/azure_completion.go b/internal/server/web/proxy/azure_completion.go index 88e7182..f1d5906 100644 --- a/internal/server/web/proxy/azure_completion.go +++ b/internal/server/web/proxy/azure_completion.go @@ -86,7 +86,7 @@ func getAzureCompletionsHandler(prod, private bool, client http.Client, aoe azur return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { diff --git a/internal/server/web/proxy/azure_embedding.go b/internal/server/web/proxy/azure_embedding.go index 9d917f4..d15675d 100644 --- a/internal/server/web/proxy/azure_embedding.go +++ b/internal/server/web/proxy/azure_embedding.go @@ -41,7 +41,7 @@ func getAzureEmbeddingsHandler(prod, private bool, client http.Client, aoe azure return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) start := time.Now() diff --git a/internal/server/web/proxy/chat_completion.go b/internal/server/web/proxy/chat_completion.go index 52dd6bf..b33d7fe 100644 --- a/internal/server/web/proxy/chat_completion.go +++ b/internal/server/web/proxy/chat_completion.go @@ -37,7 +37,7 @@ func getChatCompletionHandler(prod, private bool, client http.Client, e estimato return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { diff --git a/internal/server/web/proxy/custom_provider.go b/internal/server/web/proxy/custom_provider.go index dbe6a1d..4a3b5bc 100644 --- a/internal/server/web/proxy/custom_provider.go +++ b/internal/server/web/proxy/custom_provider.go @@ -84,7 +84,7 @@ func getCustomProviderHandler(prod bool, client http.Client, timeOut time.Durati return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { diff --git a/internal/server/web/proxy/deepinfra.go b/internal/server/web/proxy/deepinfra.go index 846f761..d5156c1 100644 --- a/internal/server/web/proxy/deepinfra.go +++ b/internal/server/web/proxy/deepinfra.go @@ -36,7 +36,7 @@ func getDeepinfraCompletionsHandler(prod, private bool, client http.Client, time return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { @@ -240,7 +240,7 @@ func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client, return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { diff --git a/internal/server/web/proxy/embedding.go b/internal/server/web/proxy/embedding.go index c7a2fe2..49c3384 100644 --- a/internal/server/web/proxy/embedding.go +++ b/internal/server/web/proxy/embedding.go @@ -57,7 +57,7 @@ func getEmbeddingHandler(prod, private bool, client http.Client, e estimator, ti return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) start := time.Now() diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index 56d72c9..a389b7e 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -168,7 +168,7 @@ type CustomPolicyDetector interface { Detect(input []string, requirements []string) (bool, error) } -func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManager, a authenticator, prod, private bool, log *zap.Logger, pub publisher, prefix string, ac accessCache, uac userAccessCache, client http.Client, scanner Scanner, cd CustomPolicyDetector, um userManager) gin.HandlerFunc { +func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManager, a authenticator, prod, private bool, log *zap.Logger, pub publisher, prefix string, ac accessCache, uac userAccessCache, client http.Client, scanner Scanner, cd CustomPolicyDetector, um userManager, removeUserAgent bool) gin.HandlerFunc { return func(c *gin.Context) { if c == nil || c.Request == nil { JSON(c, http.StatusInternalServerError, "[BricksLLM] request is empty") @@ -181,6 +181,10 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag return } + if removeUserAgent { + c.Set("removeUserAgent", removeUserAgent) + } + blw := &responseWriter{body: bytes.NewBufferString(""), ResponseWriter: c.Writer} c.Writer = blw diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go index 2594cbd..9df3e12 100644 --- a/internal/server/web/proxy/proxy.go +++ b/internal/server/web/proxy/proxy.go @@ -79,13 +79,13 @@ func CorsMiddleware() gin.HandlerFunc { } } -func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyManager, rm routeManager, a authenticator, psm ProviderSettingsManager, cpm CustomProvidersManager, ks keyStorage, kms keyMemStorage, e estimator, ae anthropicEstimator, aoe azureEstimator, v validator, r recorder, pub publisher, rlm rateLimitManager, timeOut time.Duration, ac accessCache, uac userAccessCache, pm PoliciesManager, scanner Scanner, cd CustomPolicyDetector, die deepinfraEstimator, um userManager) (*ProxyServer, error) { +func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyManager, rm routeManager, a authenticator, psm ProviderSettingsManager, cpm CustomProvidersManager, ks keyStorage, kms keyMemStorage, e estimator, ae anthropicEstimator, aoe azureEstimator, v validator, r recorder, pub publisher, rlm rateLimitManager, timeOut time.Duration, ac accessCache, uac userAccessCache, pm PoliciesManager, scanner Scanner, cd CustomPolicyDetector, die deepinfraEstimator, um userManager, removeAgentHeaders bool) (*ProxyServer, error) { router := gin.New() prod := mode == "production" private := privacyMode == "strict" router.Use(CorsMiddleware()) - router.Use(getMiddleware(cpm, rm, pm, a, prod, private, log, pub, "proxy", ac, uac, http.Client{}, scanner, cd, um)) + router.Use(getMiddleware(cpm, rm, pm, a, prod, private, log, pub, "proxy", ac, uac, http.Client{}, scanner, cd, um, removeAgentHeaders)) client := http.Client{} @@ -196,6 +196,25 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan // custom route router.POST("/api/routes/*route", getRouteHandler(prod, c, aoe, e, client, r)) + // vector store + router.POST("/api/providers/openai/v1/vector_stores", getCreateVectorStoreHandler(prod, client, timeOut)) + router.GET("/api/providers/openai/v1/vector_stores", getListVectorStoresHandler(prod, client, timeOut)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id", getGetVectorStoreHandler(prod, client, timeOut)) + router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id", getModifyVectorStoreHandler(prod, client, timeOut)) + router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id", getDeleteVectorStoreHandler(prod, client, timeOut)) + + // vector store files + router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getCreateVectorStoreFileHandler(prod, client, timeOut)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getListVectorStoreFilesHandler(prod, client, timeOut)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getGetVectorStoreFileHandler(prod, client, timeOut)) + router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getDeleteVectorStoreFileHandler(prod, client, timeOut)) + + // vector store file batches + router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches", getCreateVectorStoreFileBatchHandler(prod, client, timeOut)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id", getGetVectorStoreFileBatchHandler(prod, client, timeOut)) + router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/cancel", getCancelVectorStoreFileBatchHandler(prod, client, timeOut)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/files", getListVectorStoreFileBatchFilesHandler(prod, client, timeOut)) + srv := &http.Server{ Addr: ":8002", Handler: router, @@ -291,7 +310,7 @@ func getPassThroughHandler(prod, private bool, client http.Client, timeOut time. // copy query params req.URL.RawQuery = c.Request.URL.RawQuery - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) if c.FullPath() == "/api/providers/openai/v1/files" && c.Request.Method == http.MethodPost { purpose := c.PostForm("purpose") @@ -988,6 +1007,25 @@ func (ps *ProxyServer) Run() { // custom route ps.log.Info("PORT 8002 | POST | /api/routes/*route is ready for forwarding requests to a custom route") + // vector store + ps.log.Info("PORT 8002 | POST | /api/providers/openai/v1/vector_stores is ready for creating an openai vector store") + ps.log.Info("PORT 8002 | GET | /api/providers/openai/v1/vector_stores is ready for listing openai vector stores") + ps.log.Info("PORT 8002 | GET | /api/providers/openai/v1/vector_stores/:vector_store_id is ready for getting an openai vector store") + ps.log.Info("PORT 8002 | POST | /api/providers/openai/v1/vector_stores/:vector_store_id is ready for modifying an openai vector store") + ps.log.Info("PORT 8002 | DELETE | /api/providers/openai/v1/vector_stores/:vector_store_id is ready for deleting an openai vector store") + + // vector store files + ps.log.Info("PORT 8002 | POST | /api/providers/openai/v1/vector_stores/:vector_store_id/files is ready for creating an openai vector store file") + ps.log.Info("PORT 8002 | GET | /api/providers/openai/v1/vector_stores/:vector_store_id/files is ready for listing openai vector store files") + ps.log.Info("PORT 8002 | GET | /api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id is ready for getting an openai vector store file") + ps.log.Info("PORT 8002 | DELETE | /api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id is ready for deleting an openai vector store file") + + // vector store file batches + ps.log.Info("PORT 8002 | POST | /api/providers/openai/v1/vector_stores/:vector_store_id/file_batches is ready for creating an openai vector store file batch") + ps.log.Info("PORT 8002 | GET | /api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id is ready for getting an openai vector store file batch") + ps.log.Info("PORT 8002 | POST | /api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/cancel is ready for cancelling an openai vector store file batch") + ps.log.Info("PORT 8002 | GET | /api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/files is ready for listing openai vector store file batch files") + if err := ps.server.ListenAndServe(); err != nil && err != http.ErrServerClosed { ps.log.Sugar().Fatalf("error proxy server listening: %v", err) return diff --git a/internal/server/web/proxy/vector_store.go b/internal/server/web/proxy/vector_store.go new file mode 100644 index 0000000..02b25e6 --- /dev/null +++ b/internal/server/web/proxy/vector_store.go @@ -0,0 +1,419 @@ +package proxy + +import ( + "context" + "encoding/json" + "io" + "net/http" + "time" + + "github.com/bricks-cloud/bricksllm/internal/telemetry" + "github.com/bricks-cloud/bricksllm/internal/util" + "github.com/gin-gonic/gin" + goopenai "github.com/sashabaranov/go-openai" +) + +func getCreateVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_create_vector_store_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores", c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_create_vector_store_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_create_vector_store_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_create_vector_store_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getListVectorStoresHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_list_vector_stores_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores", c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_list_vector_stores_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_list_vector_stores_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_list_vector_stores_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_list_vector_stores_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_list_vector_stores_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_list_vector_stores_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getGetVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_get_vector_store_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_get_vector_store_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_get_vector_store_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_get_vector_store_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getModifyVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_modify_vector_store_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_modify_vector_store_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_modify_vector_store_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_modify_vector_store_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_modify_vector_store_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_modify_vector_store_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_modify_vector_store_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getDeleteVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_delete_vector_store_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodDelete, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_delete_vector_store_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_delete_vector_store_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_delete_vector_store_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_delete_vector_store_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_delete_vector_store_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_delete_vector_store_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} diff --git a/internal/server/web/proxy/vector_store_file.go b/internal/server/web/proxy/vector_store_file.go new file mode 100644 index 0000000..2ef5206 --- /dev/null +++ b/internal/server/web/proxy/vector_store_file.go @@ -0,0 +1,338 @@ +package proxy + +import ( + "context" + "encoding/json" + "io" + "net/http" + "time" + + "github.com/bricks-cloud/bricksllm/internal/telemetry" + "github.com/bricks-cloud/bricksllm/internal/util" + "github.com/gin-gonic/gin" + goopenai "github.com/sashabaranov/go-openai" +) + +func getCreateVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files", c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_file_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_file_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_file_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getListVectorStoreFilesHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_list_vector_store_files_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files", c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_list_vector_store_files_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_list_vector_store_files_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_list_vector_store_files_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_list_vector_store_files_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_list_vector_store_files_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_list_vector_store_files_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getGetVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files/"+c.Param("file_id"), c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_file_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_file_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_file_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getDeleteVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_delete_vector_store_file_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodDelete, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files/"+c.Param("file_id"), c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_delete_vector_store_file_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_delete_vector_store_file_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_delete_vector_store_file_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_delete_vector_store_file_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_delete_vector_store_file_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_delete_vector_store_file_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} diff --git a/internal/server/web/proxy/vector_store_file_batch.go b/internal/server/web/proxy/vector_store_file_batch.go new file mode 100644 index 0000000..12fce40 --- /dev/null +++ b/internal/server/web/proxy/vector_store_file_batch.go @@ -0,0 +1,338 @@ +package proxy + +import ( + "context" + "encoding/json" + "io" + "net/http" + "time" + + "github.com/bricks-cloud/bricksllm/internal/telemetry" + "github.com/bricks-cloud/bricksllm/internal/util" + "github.com/gin-gonic/gin" + goopenai "github.com/sashabaranov/go-openai" +) + +func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_batch_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches", c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_batch_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_file_batch_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_batch_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_file_batch_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_create_vector_store_file_batch_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_batch_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getGetVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_batch_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id"), c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_batch_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_file_batch_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_batch_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_file_batch_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_get_vector_store_file_batch_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_batch_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id")+"/cancel", c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} + +func getListVectorStoreFileBatchFilesHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id")+"/files", c.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create azure openai http request") + return + } + + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + c.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + telemetry.Incr("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.success_latency", dur, nil, 1) + + c.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http chat completion response body", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai chat completion error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + c.Data(res.StatusCode, "application/json", bytes) + } +} diff --git a/internal/server/web/proxy/vllm.go b/internal/server/web/proxy/vllm.go index cc276fc..5415913 100644 --- a/internal/server/web/proxy/vllm.go +++ b/internal/server/web/proxy/vllm.go @@ -46,7 +46,7 @@ func getVllmCompletionsHandler(prod, private bool, client http.Client, timeOut t return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming { @@ -394,7 +394,7 @@ func getVllmChatCompletionsHandler(prod, private bool, client http.Client, timeO return } - copyHttpHeaders(c.Request, req) + copyHttpHeaders(c.Request, req, c.GetBool("removeUserAgent")) isStreaming := c.GetBool("stream") if isStreaming {