Skip to content

Commit 4076bd2

Browse files
authored
small changes in texts (#156)
Signed-off-by: Maya Barnea <[email protected]>
1 parent e112efe commit 4076bd2

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,13 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
122122
- `tokenizers-cache-dir`: the directory for caching tokenizers
123123
- `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
124124
- `zmq-endpoint`: ZMQ address to publish events
125-
- `zmq-max-connect-attempts`: the maximum number of ZMQ connection attempts. defaults to 0. maximum: 10
125+
- `zmq-max-connect-attempts`: the maximum number of ZMQ connection attempts, defaults to 0, maximum: 10
126126
- `event-batch-size`: the maximum number of kv-cache events to be sent together, defaults to 16
127-
- `fake-metrics`: represents a predefined set of metrics to be sent to Prometheus as a substitute for the actual data. When specified, only these fake metrics will be reported — real metrics and fake metrics will never be reported simultaneously. The set should include values for
127+
- `fake-metrics`: represents a predefined set of metrics to be sent to Prometheus as a substitute for the real metrics. When specified, only these fake metrics will be reported — real metrics and fake metrics will never be reported together. The set should include values for
128128
- `running-requests`
129129
- `waiting-requests`
130130
- `kv-cache-usage`
131-
- `loras` - an array containing LoRA information objects, each with `running` (a comma-separated list of active LoRAs), `waiting` (a comma-separated list of LoRAs on hold), and a `timestamp`.
131+
- `loras` - an array containing LoRA information objects, each with the fields: `running` (a comma-separated list of LoRAs in use by running requests), `waiting` (a comma-separated list of LoRAs to be used by waiting requests), and `timestamp` (seconds since Jan 1 1970, the timestamp of this metric).
132132

133133
Example:
134134
{"running-requests":10,"waiting-requests":30,"kv-cache-usage":0.4,"loras":[{"running":"lora4,lora2","waiting":"lora3","timestamp":1257894567},{"running":"lora4,lora3","waiting":"","timestamp":1257894569}]}

pkg/common/config.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
420420
f.StringVar(&config.TokenizersCacheDir, "tokenizers-cache-dir", config.TokenizersCacheDir, "Directory for caching tokenizers")
421421
f.StringVar(&config.HashSeed, "hash-seed", config.HashSeed, "Seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)")
422422
f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
423-
f.UintVar(&config.ZMQMaxConnectAttempts, "zmq-max-connect-attempts", config.ZMQMaxConnectAttempts, "Maximum number of times to retry ZMQ requests")
423+
f.UintVar(&config.ZMQMaxConnectAttempts, "zmq-max-connect-attempts", config.ZMQMaxConnectAttempts, "Maximum number of times to try ZMQ connect")
424424
f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
425425

426426
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
@@ -429,7 +429,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
429429
var dummyMultiString multiString
430430
f.Var(&dummyMultiString, "served-model-name", "Model names exposed by the API (a list of space-separated strings)")
431431
f.Var(&dummyMultiString, "lora-modules", "List of LoRA adapters (a list of space-separated JSON strings)")
432-
f.Var(&dummyMultiString, "fake-metrics", "A set of metrics to send to Prometheus instead of the real data")
432+
f.Var(&dummyMultiString, "fake-metrics", "A set of metrics to report to Prometheus instead of the real metrics")
433433
// In order to allow empty arguments, we set a dummy NoOptDefVal for these flags
434434
f.Lookup("served-model-name").NoOptDefVal = dummy
435435
f.Lookup("lora-modules").NoOptDefVal = dummy

0 commit comments

Comments
 (0)