merge master

bricks-cloud · Mar 5, 2024 · 49e74ff · 49e74ff
2 parents a7a44a7 + b41d5fe
commit 49e74ff
Show file tree

Hide file tree

Showing 49 changed files with 2,434 additions and 1,055 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,73 @@
+## 1.12.1 - 2024-02-28
+### Added
+- Added querying keys by `keyIds`
+- Increased default postgres DB read timeout to `15s` and write timeout to `5s`
+
+## 1.12.0 - 2024-02-28
+### Added
+- Added setting rotation feature to key
+
+## 1.11.0 - 2024-02-28
+### Added
+- Added cost tracking for OpenAI audio endpoints
+- Added inference cost tracking for OpenAI finetune models
+
+## 1.10.0 - 2024-02-21
+### Added
+- Added `userId` as a new filter option for get events API endpoint
+- Added option to store request and response using keys
+
+## 1.9.6 - 2024-02-18
+### Added
+- Added support for updating key cost limit and rate limit
+
+### Changed
+- Removed validation to updating revoked key field
+
+## 1.9.5 - 2024-02-18
+### Added
+- Added new model "gpt-4-turbo-preview" and "gpt-4-vision-preview" to the cost map
+
+## 1.9.4 - 2024-02-16
+### Added
+- Added support for calculating cost for the cheaper 3.5 turbo model 
+- Added validation to updating revoked key field
+
+## 1.9.3 - 2024-02-13
+### Added
+- Added CORS support in the proxy
+
+## 1.9.2 - 2024-02-06
+### Fixed
+- Fixed custom route tokens recording issue incurred by the new architecture
+
+## 1.9.1 - 2024-02-06
+### Fixed
+- Fixed OpenAI chat completion endpoint being slow
+
+## 1.9.0 - 2024-02-06
+### Changed
+- Drastically improved performance through event driven architecture
+
+### Fixed
+- Fixed API calls that exceeds cost limit not being blocked bug
+
+## 1.8.2 - 2024-01-31
+### Added
+- Added support for new chat completion models
+- Added new querying options for metrics and events API
+
+## 1.8.1 - 2024-01-31
+### Changed
+- Extended default proxy request timeout to 10m
+
+### Fixed
+- Fixed streaming response stuck at context deadline exceeded error
+
+## 1.8.0 - 2024-01-26
+### Added
+- Added key authentication for admin endpoints
+
 ## 1.7.6 - 2024-01-17
 ### Fixed
 - Changed code to string in OpenAI error response

diff --git a/README.md b/README.md
@@ -11,6 +11,9 @@
    <a href="https://github.com/bricks-cloud/bricks/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-red" alt="License"></a>
 </p>
 
+> [!TIP]
+> A [managed version of BricksLLM](https://www.trybricks.ai?utm_source=github&utm_medium=repo&utm_campaign=bricksllm) is also available! It is production ready, and comes with a dashboard to make interacting with BricksLLM easier. Try us out for free today!
+
 **BricksLLM** is a cloud native AI gateway written in Go. Currently, it provide native support for OpenAI, Anthropic and Azure OpenAI. We let you create API keys that have rate limits, cost limits and TTLs. The API keys can be used in both development and production to achieve fine-grained access control that is not provided by any of the foundational model providers. The proxy is designed to be 100% compatible with existing SDKs.
 
 ## Features
@@ -145,10 +148,18 @@ docker pull luyuanxin1995/bricksllm:1.4.0
 > | `REDIS_WRITE_TIME_OUT`         | optional | Timeout for Redis write operations | `500ms`
 > | `IN_MEMORY_DB_UPDATE_INTERVAL`         | optional | The interval BricksLLM API gateway polls Postgresql DB for latest key configurations | `1s`
 > | `STATS_PROVIDER`         | optional | This value can only be datadog. Required for integration with Datadog.  |
-> | `PROXY_TIMEOUT`         | optional | This value can only be datadog. Required for integration with Datadog.  |
+> | `PROXY_TIMEOUT`         | optional | Timeout for proxy HTTP requests. |
+> | `ADMIN_PASS`         | optional | Simple password authentication for admin endpoints.  |
 
 ## Configuration Endpoints
 The configuration server runs on Port `8001`.
+
+##### Headers
+> | name   |  type      | data type      | description                                          |
+> |--------|------------|----------------|------------------------------------------------------|
+> | `X-API-KEY` |  optional  | `string`         | Key authentication header.
+
+
 <details>
   <summary>Get keys: <code>GET</code> <code><b>/api/key-management/keys</b></code></summary>
 
@@ -161,7 +172,8 @@ This endpoint is set up for retrieving key configurations using a query param ca
 > |--------|------------|----------------|------------------------------------------------------|
 > | `tag` |  optional   | `string`         | Identifier attached to a key configuration                  |
 > | `tags` |  optional  | `[]string`         | Identifiers attached to a key configuration                  |
-> | `provider` |  optional  | `string`         | Provider attached to a key provider configuration. Its value can only be `openai`.                 |
+> | `provider` |  optional  | `string`         | Provider attached to a key provider configuration. Its value can only be `openai`.
+> | `keyIds` |  optional  | `[]string`         | Unique identifiers for keys.
 
 ##### Error Response
 
@@ -202,6 +214,9 @@ Fields of KeyConfiguration
 > | allowedPaths | `[]PathConfig` | `[{ "path": "/api/providers/openai/v1/chat/completion", "method": "POST"}]` | Allowed paths that can be accessed using the key. |
 > | settingId | `string` | `98daa3ae-961d-4253-bf6a-322a32fdca3d` | This field is DEPERCATED. Use `settingIds` field instead.  |
 > | settingIds | `string` | `[98daa3ae-961d-4253-bf6a-322a32fdca3d]` | Setting ids associated with the key. |
+> | shouldLogRequest | `bool` | `false` | Should request be stored. |
+> | shouldLogResponse | `bool` | `true` | Should response be stored. |
+> | rotationEnabled | `bool` | `false` | Should key rotate setting used to access third party endpoints in order to circumvent rate limits. |
 
 </details>
 
@@ -236,6 +251,9 @@ PathConfig
 > | rateLimitUnit | optional | `enum` | m                         |  Time unit for rateLimitOverTime. Possible values are [`h`, `m`, `s`, `d`]       |
 > | ttl | optional | `string` | 2d | time to live. Available units are [`s`, `m`, `h`]. |
 > | allowedPaths | optional | `[]PathConfig` | 2d | Pathes allowed for access. |
+> | shouldLogRequest | optional | `bool` | `false` | Should request be stored. |
+> | shouldLogResponse | optional | `bool` | `true` | Should response be stored. |
+> | rotationEnabled | optional | `bool` | `false` | Should key rotate setting used to access third party endpoints in order to circumvent rate limits. |
 
 
 ##### Error Response
@@ -272,6 +290,9 @@ PathConfig
 > | allowedPaths | `[]PathConfig` | `[{ "path": "/api/providers/openai/v1/chat/completion", method: "POST"}]` | Allowed paths that can be accessed using the key. |
 > | settingId | `string` | `98daa3ae-961d-4253-bf6a-322a32fdca3d` | This field is DEPERCATED. Use `settingIds` field instead.  |
 > | settingIds | `string` | `[98daa3ae-961d-4253-bf6a-322a32fdca3d]` | Setting ids associated with the key. |
+> | shouldLogRequest | `bool` | `false` | Should request be stored. |
+> | shouldLogResponse | `bool` | `true` | Should response be stored. |
+> | rotationEnabled | `bool` | `false` | Should key rotate setting used to access third party endpoints in order to circumvent rate limits. |
 
 </details>
 
@@ -302,8 +323,16 @@ PathConfig
 > | name | optional | `string` | spike's developer key | Name of the API key. |
 > | tags | optional | `[]string` | `["org-tag-12345"]`             | Identifiers associated with the key. |
 > | revoked | optional |  `boolean` | `true` | Indicator for whether the key is revoked.  |
-> | revokedReason| optional | `string` | The key has expired | Reason for why the key is revoked.  |
-> | allowedPaths | optional | `[]PathConfig` | 2d | Pathes allowed for access. |
+> | revokedReason | optional | `string` | The key has expired | Reason for why the key is revoked.  |
+> | costLimitInUsd | optional | `float64` | `5.5` | Total spend limit of the API key.
+> | costLimitInUsdOverTime | optional | `float64` | `2` | Total spend within period of time. This field is required if costLimitInUsdUnit is specified.   |
+> | costLimitInUsdUnit | optional | `enum` | `d`                       | Time unit for costLimitInUsdOverTime. Possible values are [`m`, `h`, `d`, `mo`].      |
+> | rateLimitOverTime | optional | `int` | `2` | rate limit over period of time. This field is required if rateLimitUnit is specified.    |
+> | rateLimitUnit | optional | `string` | `m`                         |  Time unit for rateLimitOverTime. Possible values are [`h`, `m`, `s`, `d`]       |
+> | allowedPaths | optional | `[{ "path": "/api/providers/openai/v1/chat/completions", "method": "POST"}]` | `` | Pathes allowed for access. |
+> | shouldLogRequest | optional | `bool` | `false` | Should request be stored. |
+> | shouldLogResponse | optional | `bool` | `true` | Should response be stored. |
+> | rotationEnabled | optional | `bool` | `false` | Should key rotate setting used to access third party endpoints in order to circumvent rate limits. |
 
 ##### Error Response
 
@@ -338,6 +367,9 @@ PathConfig
 > | allowedPaths | `[]PathConfig` | `[{ "path": "/api/providers/openai/v1/chat/completion", method: "POST"}]` | Allowed paths that can be accessed using the key. |
 > | settingId | `string` | `98daa3ae-961d-4253-bf6a-322a32fdca3d` | This field is DEPERCATED. Use `settingIds` field instead.  |
 > | settingIds | `string` | `[98daa3ae-961d-4253-bf6a-322a32fdca3d]` | Setting ids associated with the key. |
+> | shouldLogRequest | `bool` | `false` | Should request be stored. |
+> | shouldLogResponse | `bool` | `true` | Should response be stored. |
+> | rotationEnabled | `bool` | `false` | Should key rotate setting used to access third party endpoints in order to circumvent rate limits. |
 
 </details>
 
@@ -489,7 +521,9 @@ This endpoint is retrieving aggregated metrics given an array of key ids and tag
 > | Field | required | type | example                      | description |
 > |---------------|-----------------------------------|-|-|-|
 > | keyIds | required | `[]string` | `["key-1", "key-2", "key-3" ]` | Array of ids that specicify the keys that you want to aggregate stats from. |
-> | tags | required | `[]string` | `["tag-1", "tag-2"]`           | Array of tags that specicify the keys that you want to aggregate stats from. |
+> | tags | required | `[]string` | `["tag-1", "tag-2"]`           | Array of tags that specicify the key tags that you want to aggregate stats from. |
+> | customIds | required | `[]string` | `["customId-1", "customId-2"]` | A list of custom IDs that you want to aggregate stats from. |
+> | filters | required | `[]string` | `["model", "keyId"]` | Group by data points through different filters(`model`,`keyId` or `customId`). |
 > | start | required | `int64` | `1699933571` | Start timestamp for the requested timeseries data. |
 > | end | required | `int64` | `1699933571` | End timestamp for the requested timeseries data. |
 > | increment | required | `int` | `60` | This field is the increment in seconds for the requested timeseries data. |
@@ -526,6 +560,7 @@ Datapoint
 > | successCount | `int` | `555` | Aggregated number of successful http requests over the given time increment. |
 > | keyId | `int` | `555.7` | key Id associated with the event. |
 > | model | `string` | `gpt-3.5-turbo` | model associated with the event. |
+> | customId | `string` | `customId` | customId associated with the event. |
 
 </details>
 
@@ -538,7 +573,10 @@ This endpoint is for getting events.
 ##### Query Parameters
 > | name   |  type      | data type      | description                                          |
 > |--------|------------|----------------|------------------------------------------------------|
-> | `customId` |  optional   | string         | Custom identifier attached to an event                  |
+> | `customId` |  optional   | `string`         | Custom identifier attached to an event.                  |
+> | `keyIds` |  optional   | `[]string`         | A list of key IDs.                 |
+> | `start` |  required if `keyIds` is specified   | `int64`         | Start timestamp.                |
+> | `end` |  required if `keyIds` is specified   | `int64`         | End timestamp.                |
 
 ##### Error Response
 > | http code     | content-type                      |
@@ -575,6 +613,8 @@ Event
 > | path | `string` | `/api/v1/chat/completion` | Provider setting name. |
 > | method | `string` | `POST` | Http method for the assoicated proxu request. |
 > | custom_id | `string` | `YOUR_CUSTOM_ID` | Custom Id passed by the user in the headers of proxy requests. |
+> | request | `[]byte` | `{}` | Custom Id passed by the user in the headers of proxy requests. |
+> | custom_id | `string` | `YOUR_CUSTOM_ID` | Custom Id passed by the user in the headers of proxy requests. |
 </details>
 
 <details>

diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go
@@ -14,6 +14,7 @@ import (
 	"github.com/bricks-cloud/bricksllm/internal/config"
 	"github.com/bricks-cloud/bricksllm/internal/logger/zap"
 	"github.com/bricks-cloud/bricksllm/internal/manager"
+	"github.com/bricks-cloud/bricksllm/internal/message"
 	"github.com/bricks-cloud/bricksllm/internal/provider/anthropic"
 	"github.com/bricks-cloud/bricksllm/internal/provider/azure"
 	"github.com/bricks-cloud/bricksllm/internal/provider/custom"
@@ -176,18 +177,31 @@ func main() {
 		log.Sugar().Fatalf("error connecting to api redis cache: %v", err)
 	}
 
+	accessRedisCache := redis.NewClient(&redis.Options{
+		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
+		Password: cfg.RedisPassword,
+		DB:       4,
+	})
+
+	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	if err := apiRedisCache.Ping(ctx).Err(); err != nil {
+		log.Sugar().Fatalf("error connecting to api redis cache: %v", err)
+	}
+
 	rateLimitCache := redisStorage.NewCache(rateLimitRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 	costLimitCache := redisStorage.NewCache(costLimitRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 	costStorage := redisStorage.NewStore(costRedisStorage, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 	apiCache := redisStorage.NewCache(apiRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
+	accessCache := redisStorage.NewAccessCache(accessRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 
-	m := manager.NewManager(store)
+	m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache)
 	krm := manager.NewReportingManager(costStorage, store, store)
 	psm := manager.NewProviderSettingsManager(store, psMemStore)
 	cpm := manager.NewCustomProvidersManager(store, cpMemStore)
 	rm := manager.NewRouteManager(store, store, rMemStore, psMemStore)
 
-	as, err := admin.NewAdminServer(log, *modePtr, m, krm, psm, cpm, rm)
+	as, err := admin.NewAdminServer(log, *modePtr, m, krm, psm, cpm, rm, cfg.AdminPass)
 	if err != nil {
 		log.Sugar().Fatalf("error creating admin http server: %v", err)
 	}
@@ -214,7 +228,16 @@ func main() {
 
 	c := cache.NewCache(apiCache)
 
-	ps, err := proxy.NewProxyServer(log, *modePtr, *privacyPtr, c, m, rm, a, psm, cpm, store, memStore, ce, ace, aoe, v, rec, rlm, cfg.ProxyTimeout)
+	messageBus := message.NewMessageBus()
+	eventMessageChan := make(chan message.Message)
+	messageBus.Subscribe("event", eventMessageChan)
+
+	handler := message.NewHandler(rec, log, ace, ce, aoe, v, m, rlm, accessCache)
+
+	eventConsumer := message.NewConsumer(eventMessageChan, log, 4, handler.HandleEventWithRequestAndResponse)
+	eventConsumer.StartEventMessageConsumers()
+
+	ps, err := proxy.NewProxyServer(log, *modePtr, *privacyPtr, c, m, rm, a, psm, cpm, store, memStore, ce, ace, aoe, v, rec, messageBus, rlm, cfg.ProxyTimeout, accessCache)
 	if err != nil {
 		log.Sugar().Fatalf("error creating proxy http server: %v", err)
 	}
@@ -225,6 +248,7 @@ func main() {
 	signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
 	<-quit
 
+	eventConsumer.Stop()
 	memStore.Stop()
 	psMemStore.Stop()
 	cpMemStore.Stop()