Skip to content

Commit b51f885

Browse files
[Fix] Resolve int32 overflow by having the calculation in int64 and c… (#4158)
* [Fix] Resolve int32 overflow by having the calculation in int64 and cap it if the count is over math.MaxInt32 Signed-off-by: justinyeh1995 <[email protected]> * [Test] Add unit tests for CalculateReadyReplicas Signed-off-by: justinyeh1995 <[email protected]> * [Fix] Add a nosec comment to pass the Lint (pre-commit) test Signed-off-by: justinyeh1995 <[email protected]> * [Refactor] Add CapInt64ToInt32 to replace #nosec directives Signed-off-by: justinyeh1995 <[email protected]> * [Refactor] Rename function to SafeInt64ToInt32 and add a underflowing prevention (it also help pass the lint test) Signed-off-by: justinyeh1995 <[email protected]> * [Refactor] Remove the early return as SafeInt64ToInt32 handles the int32 overflow and underflow checking. Signed-off-by: justinyeh1995 <[email protected]> --------- Signed-off-by: justinyeh1995 <[email protected]>
1 parent 470d651 commit b51f885

File tree

2 files changed

+109
-3
lines changed

2 files changed

+109
-3
lines changed

ray-operator/controllers/ray/utils/util.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,18 @@ func SafeUint64ToInt64(n uint64) int64 {
253253
return int64(n)
254254
}
255255

256+
// SafeInt64ToInt32 converts int64 to int32, preventing overflow/underflow by
257+
// bounding the value between [math.MinInt32, math.MaxInt32]
258+
func SafeInt64ToInt32(n int64) int32 {
259+
if n > math.MaxInt32 {
260+
return math.MaxInt32
261+
}
262+
if n < math.MinInt32 {
263+
return math.MinInt32
264+
}
265+
return int32(n)
266+
}
267+
256268
// GetNamespace return namespace
257269
func GetNamespace(metaData metav1.ObjectMeta) string {
258270
if metaData.Namespace == "" {
@@ -393,15 +405,15 @@ func CalculateMinReplicas(cluster *rayv1.RayCluster) int32 {
393405

394406
// CalculateMaxReplicas calculates max worker replicas at the cluster level
395407
func CalculateMaxReplicas(cluster *rayv1.RayCluster) int32 {
396-
count := int32(0)
408+
count := int64(0)
397409
for _, nodeGroup := range cluster.Spec.WorkerGroupSpecs {
398410
if nodeGroup.Suspend != nil && *nodeGroup.Suspend {
399411
continue
400412
}
401-
count += (*nodeGroup.MaxReplicas * nodeGroup.NumOfHosts)
413+
count += int64(*nodeGroup.MaxReplicas) * int64(nodeGroup.NumOfHosts)
402414
}
403415

404-
return count
416+
return SafeInt64ToInt32(count)
405417
}
406418

407419
// CalculateReadyReplicas calculates ready worker replicas at the cluster level

ray-operator/controllers/ray/utils/util_test.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,100 @@ func TestCalculateDesiredReplicas(t *testing.T) {
805805
}
806806
}
807807

808+
func TestCalculateMaxReplicasOverflow(t *testing.T) {
809+
tests := []struct {
810+
name string
811+
specs []rayv1.WorkerGroupSpec
812+
expected int32
813+
}{
814+
{
815+
name: "Bug reproduction: issue report with replicas=1, minReplicas=3, numOfHosts=4",
816+
specs: []rayv1.WorkerGroupSpec{
817+
{
818+
GroupName: "workergroup",
819+
Replicas: ptr.To[int32](1),
820+
MinReplicas: ptr.To[int32](3),
821+
MaxReplicas: ptr.To[int32](2147483647), // Default max int32
822+
NumOfHosts: 4,
823+
},
824+
},
825+
expected: 2147483647, // Was -4 before fix, should be capped at max int32
826+
},
827+
{
828+
name: "Single group overflow with default maxReplicas and numOfHosts=4",
829+
specs: []rayv1.WorkerGroupSpec{
830+
{
831+
NumOfHosts: 4,
832+
MinReplicas: ptr.To[int32](3),
833+
MaxReplicas: ptr.To[int32](2147483647),
834+
},
835+
},
836+
expected: 2147483647, // Should be capped at max int32
837+
},
838+
{
839+
name: "Single group overflow with large values",
840+
specs: []rayv1.WorkerGroupSpec{
841+
{
842+
NumOfHosts: 1000,
843+
MinReplicas: ptr.To[int32](1),
844+
MaxReplicas: ptr.To[int32](2147483647),
845+
},
846+
},
847+
expected: 2147483647, // Should be capped
848+
},
849+
{
850+
name: "Multiple groups causing overflow when summed",
851+
specs: []rayv1.WorkerGroupSpec{
852+
{
853+
NumOfHosts: 2,
854+
MinReplicas: ptr.To[int32](1),
855+
MaxReplicas: ptr.To[int32](1500000000),
856+
},
857+
{
858+
NumOfHosts: 1,
859+
MinReplicas: ptr.To[int32](1),
860+
MaxReplicas: ptr.To[int32](1000000000),
861+
},
862+
},
863+
expected: 2147483647, // 3B + 1B > max int32, should be capped
864+
},
865+
{
866+
name: "No overflow with reasonable values",
867+
specs: []rayv1.WorkerGroupSpec{
868+
{
869+
NumOfHosts: 4,
870+
MinReplicas: ptr.To[int32](2),
871+
MaxReplicas: ptr.To[int32](100),
872+
},
873+
},
874+
expected: 400, // 100 * 4 = 400, no overflow
875+
},
876+
{
877+
name: "Edge case: exactly at max int32",
878+
specs: []rayv1.WorkerGroupSpec{
879+
{
880+
NumOfHosts: 1,
881+
MinReplicas: ptr.To[int32](1),
882+
MaxReplicas: ptr.To[int32](2147483647),
883+
},
884+
},
885+
expected: 2147483647, // Exactly at limit
886+
},
887+
}
888+
889+
for _, tc := range tests {
890+
t.Run(tc.name, func(t *testing.T) {
891+
cluster := &rayv1.RayCluster{
892+
Spec: rayv1.RayClusterSpec{
893+
WorkerGroupSpecs: tc.specs,
894+
},
895+
}
896+
result := CalculateMaxReplicas(cluster)
897+
assert.Equal(t, tc.expected, result)
898+
})
899+
}
900+
}
901+
808902
func TestUnmarshalRuntimeEnv(t *testing.T) {
809903
tests := []struct {
810904
name string

0 commit comments

Comments
 (0)