Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions assets/gpu-feature-discovery/0210_clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,9 @@ rules:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
6 changes: 6 additions & 0 deletions config/rbac/auth_proxy_client_clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,9 @@ metadata:
rules:
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
- apiGroups:
- ""
resources:
- pods
verbs:
- get
6 changes: 6 additions & 0 deletions config/rbac/auth_proxy_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,9 @@ rules:
resources:
- subjectaccessreviews
verbs: ["create"]
- apiGroups:
- ""
resources:
- pods
verbs:
- get
6 changes: 6 additions & 0 deletions config/rbac/clusterpolicy_editor_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ rules:
- clusterpolicies/status
verbs:
- get
- apiGroups:
- ""
resources:
- pods
verbs:
- get
6 changes: 6 additions & 0 deletions config/rbac/clusterpolicy_viewer_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@ rules:
- clusterpolicies/status
verbs:
- get
- apiGroups:
- ""
resources:
- pods
verbs:
- get
6 changes: 6 additions & 0 deletions config/rbac/leader_election_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,9 @@ rules:
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
6 changes: 6 additions & 0 deletions config/rbac/nvidiadriver_editor_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ rules:
- nvidiadrivers/status
verbs:
- get
- apiGroups:
- ""
resources:
- pods
verbs:
- get
6 changes: 6 additions & 0 deletions config/rbac/nvidiadriver_viewer_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@ rules:
- nvidiadrivers/status
verbs:
- get
- apiGroups:
- ""
resources:
- pods
verbs:
- get
20 changes: 20 additions & 0 deletions controllers/clusterpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,16 @@ func updateCRState(ctx context.Context, r *ClusterPolicyReconciler, namespacedNa
}
}

func isOwnedByDaemonSet(ownerRefs []metav1.OwnerReference) bool {

for _, ownerRef := range ownerRefs {
if ownerRef.Kind == "DaemonSet" && ownerRef.Controller != nil && *ownerRef.Controller {
return true
}
}
return false
}

func addWatchNewGPUNode(r *ClusterPolicyReconciler, c controller.Controller, mgr ctrl.Manager) error {
// Define a mapping from the Node object in the event to one or more
// ClusterPolicy objects to Reconcile
Expand Down Expand Up @@ -288,6 +298,15 @@ func addWatchNewGPUNode(r *ClusterPolicyReconciler, c controller.Controller, mgr
return hasGPULabels(labels)
},
UpdateFunc: func(e event.TypedUpdateEvent[*corev1.Node]) bool {
r.Log.Info("SHIVAAAAAAAA Calling labels: UpdateFunc", "labels", e.ObjectNew.GetLabels())

ownerRefs := e.ObjectNew.GetOwnerReferences()
if isOwnedByDaemonSet(ownerRefs) {
r.Log.Info("SHIVAAAAAAAA DaemonSet owner found", "ownerRefs", ownerRefs)
// return false
}
r.Log.Info("SHIVAAAAAAAA Calling labels: UpdateFunc", "ownerRefs", ownerRefs)

newLabels := e.ObjectNew.GetLabels()
oldLabels := e.ObjectOld.GetLabels()
nodeName := e.ObjectNew.GetName()
Expand Down Expand Up @@ -323,6 +342,7 @@ func addWatchNewGPUNode(r *ClusterPolicyReconciler, c controller.Controller, mgr
"osTreeLabelChanged", osTreeLabelChanged,
)
}
r.Log.Info("SHIVAAAAAAAA Calling labels: UpdateFunc", "needsUpdate", needsUpdate)
return needsUpdate
},
DeleteFunc: func(e event.TypedDeleteEvent[*corev1.Node]) bool {
Expand Down
10 changes: 10 additions & 0 deletions controllers/object_controls.go
Original file line number Diff line number Diff line change
Expand Up @@ -3846,6 +3846,16 @@ func isDaemonSetReady(name string, n ClusterPolicyController) gpuv1.State {
}
}
}
// Create owner ref
ownerReference := []metav1.OwnerReference{}
// Get pod owner reference
ownerReference = append(ownerReference, metav1.OwnerReference{
APIVersion: corev1.SchemeGroupVersion.String(),
Kind: "Pod",
Name: pod.Name,
UID: pod.UID,
})
pod.OwnerReferences = ownerReference
}

// All containers are ready
Expand Down
1 change: 1 addition & 0 deletions controllers/state_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,7 @@ func (n *ClusterPolicyController) init(ctx context.Context, reconciler *ClusterP
n.logger.Info("Pod Security Admission labels added to GPU Operator namespace", "namespace", n.operatorNamespace)
}

n.logger.Info("SHIVAAAAAAAA Calling labels: namespace, hasNFDLabels", "namespace", n.operatorNamespace, "hasNFDLabels", n.hasNFDLabels)
// fetch all nodes and label gpu nodes
hasNFDLabels, gpuNodeCount, err := n.labelGPUNodes()
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,10 @@ rules:
verbs:
- delete
- list
- apiGroups:
- ""
resources:
- pods
verbs:
- get
{{- end }}
Loading