Skip to content
This repository was archived by the owner on Jan 29, 2025. It is now read-only.

Commit 156485f

Browse files
uniemimutogashidm
authored andcommitted
Support for card disabling, allowlist, denylist, descheduling.
Signed-off-by: Ukri Niemimuukko <[email protected]>
1 parent 11c82d8 commit 156485f

19 files changed

+1307
-362
lines changed

extender/scheduler.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//Package extender contains types and logic to respond to requests from a Kubernetes http scheduler extender.
1+
// Package extender contains types and logic to respond to requests from a Kubernetes http scheduler extender.
22
package extender
33

44
import (
@@ -11,7 +11,7 @@ import (
1111
"k8s.io/klog/v2"
1212
)
1313

14-
//postOnly check if the method type is POST
14+
// postOnly check if the method type is POST.
1515
func postOnly(next http.HandlerFunc) http.HandlerFunc {
1616
return func(w http.ResponseWriter, r *http.Request) {
1717
if r.Method != http.MethodPost {
@@ -24,7 +24,7 @@ func postOnly(next http.HandlerFunc) http.HandlerFunc {
2424
}
2525
}
2626

27-
//contentLength check the if the request size is adequate
27+
// contentLength check the if the request size is adequate.
2828
func contentLength(next http.HandlerFunc) http.HandlerFunc {
2929
return func(w http.ResponseWriter, r *http.Request) {
3030
if r.ContentLength > 1*1000*1000*1000 {
@@ -37,7 +37,7 @@ func contentLength(next http.HandlerFunc) http.HandlerFunc {
3737
}
3838
}
3939

40-
//requestContentType verify the content type of the request
40+
// requestContentType verify the content type of the request.
4141
func requestContentType(next http.HandlerFunc) http.HandlerFunc {
4242
return func(w http.ResponseWriter, r *http.Request) {
4343
requestContentType := r.Header.Get("Content-Type")
@@ -65,7 +65,7 @@ if the content type is not correct - i.e. NOT application/json - the response wi
6565
will not run.
6666
*/
6767

68-
//handlerWithMiddleware is handler wrapped with middleware to serve the prechecks at endpoint
68+
// handlerWithMiddleware is handler wrapped with middleware to serve the prechecks at endpoint.
6969
func handlerWithMiddleware(handle http.HandlerFunc) http.HandlerFunc {
7070
return requestContentType(
7171
contentLength(
@@ -74,7 +74,7 @@ func handlerWithMiddleware(handle http.HandlerFunc) http.HandlerFunc {
7474
)
7575
}
7676

77-
//error handler deals with requests sent to an invalid endpoint and returns a 404.
77+
// error handler deals with requests sent to an invalid endpoint and returns a 404.
7878
func errorHandler(w http.ResponseWriter, r *http.Request) {
7979
klog.V(2).InfoS("Requested resource: '"+r.URL.Path+"' not found", "component", "extender")
8080
w.Header().Add("Content-Type", "application/json")
@@ -106,7 +106,7 @@ func (m Server) StartServer(port string, certFile string, keyFile string, caFile
106106
klog.V(2).InfoS("Scheduler extender server failed to start "+err.Error(), "component", "extender")
107107
}
108108

109-
//Configuration values including algorithms etc for the TAS scheduling endpoint.
109+
// Configuration values including algorithms etc for the TAS scheduling endpoint.
110110
func configureSecureServer(port string, caFile string) *http.Server {
111111
caCert, err := ioutil.ReadFile(caFile)
112112
if err != nil {

extender/types.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,19 @@ import (
77
"k8s.io/apimachinery/pkg/types"
88
)
99

10-
//Scheduler has the capabilities needed to prioritize and filter nodes based on http requests.
10+
// Scheduler has the capabilities needed to prioritize and filter nodes based on http requests.
1111
type Scheduler interface {
1212
Bind(w http.ResponseWriter, r *http.Request)
1313
Prioritize(w http.ResponseWriter, r *http.Request)
1414
Filter(w http.ResponseWriter, r *http.Request)
1515
}
1616

17-
//Server type wraps the implementation of the extender.
17+
// Server type wraps the implementation of the extender.
1818
type Server struct {
1919
Scheduler
2020
}
2121

22-
//TODO: These types are in the k8s.io/kubernetes/extender/api package
22+
// TODO: These types are in the k8s.io/kubernetes/extender/api package
2323
// Some import issue is making them tough to access, so they are reimplemented here pending a solution.
2424

2525
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.

gpu-aware-scheduling/Makefile

+8-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ endif
55
.PHONY: test all build gpu-extender images format clean
66

77
test:
8-
go test ./... -v *_test.go
8+
go test ./... -v *_test.go
99

1010
all: format build
1111

@@ -16,9 +16,14 @@ image: gpu-extender
1616
docker build -f deploy/images/Dockerfile_gpu-extender ../ -t $(IMAGE_PATH)gpu-extender$(IMAGE_TAG)
1717

1818
format:
19-
gofmt -w -s .
19+
gofmt -w -s .
2020

2121
clean:
22-
rm -f ./bin/*
22+
rm -f ./bin/*
23+
24+
mock:
25+
mockery --name=CacheAPI --dir=pkg/gpuscheduler --inpkg --note="+build !validation\nre-generate with 'make mock'"
26+
mockery --name=InternalCacheAPI --dir=pkg/gpuscheduler --inpkg --note="+build !validation\nre-generate with 'make mock'"
27+
2328
e2e:
2429

gpu-aware-scheduling/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ name |type | description| usage | default|
128128
|key| string | location of the key file for the TLS endpoint| --key=/root/key.txt | /etc/kubernetes/pki/ca.key
129129
|cacert| string | location of the ca certificate for the TLS endpoint| --key=/root/cacert.txt | /etc/kubernetes/pki/ca.crt
130130
|unsafe| bool | whether or not to listen on a TLS endpoint with the scheduler extender | --unsafe=true| false
131+
|enableAllowlist| bool | enable POD-annotation based GPU allowlist feature | --enableAllowlist| false
132+
|enableDenylist| bool | enable POD-annotation based GPU denylist feature | --enableDenylist| false
131133

132134
## Adding the resource to make a deployment use GAS Scheduler Extender
133135

gpu-aware-scheduling/cmd/gas-scheduler-extender/main.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
func main() {
1212
var (
1313
kubeConfig, port, certFile, keyFile, caFile string
14-
unsafe bool
14+
unsafe, enableAllowlist, enableDenylist bool
1515
)
1616

1717
flag.StringVar(&kubeConfig, "kubeConfig", "/root/.kube/config", "location of kubernetes config file")
@@ -20,6 +20,8 @@ func main() {
2020
flag.StringVar(&keyFile, "key", "/etc/kubernetes/pki/ca.key", "key file extender will use for authentication")
2121
flag.StringVar(&caFile, "cacert", "/etc/kubernetes/pki/ca.crt", "ca file extender will use for authentication")
2222
flag.BoolVar(&unsafe, "unsafe", false, "unsafe instances of GPU aware scheduler will be served over simple http.")
23+
flag.BoolVar(&enableAllowlist, "enableAllowlist", false, "enable allowed GPUs annotation (csv list of names)")
24+
flag.BoolVar(&enableDenylist, "enableDenylist", false, "enable denied GPUs annotation (csv list of names)")
2325
klog.InitFlags(nil)
2426
flag.Parse()
2527

@@ -28,7 +30,7 @@ func main() {
2830
panic(err)
2931
}
3032

31-
gasscheduler := gpuscheduler.NewGASExtender(kubeClient)
33+
gasscheduler := gpuscheduler.NewGASExtender(kubeClient, enableAllowlist, enableDenylist)
3234
sch := extender.Server{Scheduler: gasscheduler}
3335
sch.StartServer(port, certFile, keyFile, caFile, unsafe)
3436
klog.Flush()

gpu-aware-scheduling/docs/usage.md

+9
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,15 @@ Your PODs then, needs to ask for some GPU-resources. Like this:
6565

6666
A complete example pod yaml is located in [docs/example](./example)
6767

68+
## Allowlist and Denylist
69+
70+
You can use POD-annotations in your POD-templates to list the GPU names which you allow, or deny for your deployment. The values for the annotations are comma separated value lists of the form "card0,card1,card2", and the names of the annotations are:
71+
72+
- `gas-allow`
73+
- `gas-deny`
74+
75+
Note that the feature is disabled by default. You need to enable allowlist and/or denylist via command line flags.
76+
6877
## Summary in a chronological order
6978

7079
- GPU-plugin initcontainer installs an NFD hook which prints labels for you, based on the Intel GPUs it finds

0 commit comments

Comments
 (0)