Skip to content

Commit a2dedcb

Browse files
weizhoubluetaoyounger
authored andcommitted
Fix: statefulSet applications failed to create in multi-NIC scenarios.
Signed-off-by: tao.yang <[email protected]>
1 parent 384eb56 commit a2dedcb

File tree

9 files changed

+429
-69
lines changed

9 files changed

+429
-69
lines changed

pkg/ipam/allocate.go

+85-24
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ func (i *ipam) Allocate(ctx context.Context, addArgs *models.IpamAddArgs) (*mode
7777
releaseStsOutdatedIPFlag := false
7878
if i.config.EnableStatefulSet && podTopController.APIVersion == appsv1.SchemeGroupVersion.String() && podTopController.Kind == constant.KindStatefulSet {
7979
if endpoint != nil {
80-
releaseStsOutdatedIPFlag, err = i.releaseStsOutdatedIPIfNeed(ctx, addArgs, pod, endpoint, podTopController)
80+
releaseStsOutdatedIPFlag, err = i.releaseStsOutdatedIPIfNeed(ctx, addArgs, pod, endpoint, podTopController, IsMultipleNicWithNoName(pod.Annotations))
8181
if err != nil {
8282
return nil, err
8383
}
@@ -114,13 +114,15 @@ func (i *ipam) Allocate(ctx context.Context, addArgs *models.IpamAddArgs) (*mode
114114
}
115115

116116
func (i *ipam) releaseStsOutdatedIPIfNeed(ctx context.Context, addArgs *models.IpamAddArgs,
117-
pod *corev1.Pod, endpoint *spiderpoolv2beta1.SpiderEndpoint, podTopController types.PodTopController) (bool, error) {
117+
pod *corev1.Pod, endpoint *spiderpoolv2beta1.SpiderEndpoint, podTopController types.PodTopController, isMultipleNicWithNoName bool) (bool, error) {
118118
logger := logutils.FromContext(ctx)
119119

120120
preliminary, err := i.getPoolCandidates(ctx, addArgs, pod, podTopController)
121121
if err != nil {
122122
return false, err
123123
}
124+
logger.Sugar().Infof("Preliminary IPPool candidates: %s", preliminary)
125+
124126
poolMap := make(map[string]map[string]struct{})
125127
for _, candidates := range preliminary {
126128
if _, ok := poolMap[candidates.NIC]; !ok {
@@ -131,38 +133,97 @@ func (i *ipam) releaseStsOutdatedIPIfNeed(ctx context.Context, addArgs *models.I
131133
poolMap[candidates.NIC][pool] = struct{}{}
132134
}
133135
}
134-
endpointMap := make(map[string]map[string]struct{})
135-
for _, ip := range endpoint.Status.Current.IPs {
136-
if _, ok := endpointMap[ip.NIC]; !ok {
137-
endpointMap[ip.NIC] = make(map[string]struct{})
138-
}
136+
logger.Sugar().Debugf("The current mapping between the Pod's IPPool candidates and NICs: %v", poolMap)
137+
138+
// Spiderpool assigns IP addresses to NICs one by one.
139+
// Some NICs may have their IP pools changed, while others may remain unchanged.
140+
// Record these changes and differences to handle specific NICs accordingly.
141+
releaseEndpointIPsFlag := false
142+
needReleaseEndpointIPs := []spiderpoolv2beta1.IPAllocationDetail{}
143+
noReleaseEndpointIPs := []spiderpoolv2beta1.IPAllocationDetail{}
144+
for index, ip := range endpoint.Status.Current.IPs {
139145
if ip.IPv4Pool != nil && *ip.IPv4Pool != "" {
140-
endpointMap[ip.NIC][*ip.IPv4Pool] = struct{}{}
146+
if isMultipleNicWithNoName {
147+
if _, ok := poolMap[strconv.Itoa(index)][*ip.IPv4Pool]; !ok {
148+
// If using the multi-NIC feature through ipam.spidernet.io/ippools without specifying interface names,
149+
// and if the IP pool of one NIC changes, only reclaiming the corresponding endpoint IP could cause the IPAM allocation method to lose allocation records.
150+
// When the interface name is not specified, the allocated NIC name might be "", which cannot be handled properly.
151+
// If isMultipleNicWithNoName is true, all NIC IP addresses will be reclaimed and reallocated.
152+
logger.Sugar().Infof("StatefulSet Pod need to release IP, owned pool: %v, expected pools: %v", *ip.IPv4Pool, poolMap[strconv.Itoa(index)])
153+
releaseEndpointIPsFlag = true
154+
break
155+
}
156+
}
157+
// All other cases determine here whether an IP address needs to be reclaimed.
158+
if _, ok := poolMap[ip.NIC][*ip.IPv4Pool]; !ok && ip.NIC == *addArgs.IfName {
159+
// The multi-NIC feature can be used in the following two ways:
160+
// 1. By specifying additional NICs through k8s.v1.cni.cncf.io/networks and configure the default pool.
161+
// 2. By using ipam.spidernet.io/ippools (excluding cases where the interface name is empty).
162+
// When a change is detected in the corresponding NIC's IP pool,
163+
// the IP information for that NIC will be automatically reclaimed and reallocated.
164+
logger.Sugar().Infof("StatefulSet Pod need to release IP, owned pool: %v, expected pools: %v", *ip.IPv4Pool, poolMap[ip.NIC])
165+
releaseEndpointIPsFlag = true
166+
needReleaseEndpointIPs = append(needReleaseEndpointIPs, ip)
167+
continue
168+
}
141169
}
142170
if ip.IPv6Pool != nil && *ip.IPv6Pool != "" {
143-
endpointMap[ip.NIC][*ip.IPv6Pool] = struct{}{}
171+
if isMultipleNicWithNoName {
172+
if _, ok := poolMap[strconv.Itoa(index)][*ip.IPv6Pool]; !ok {
173+
logger.Sugar().Infof("StatefulSet Pod need to release IP, owned pool: %v, expected pools: %v", *ip.IPv6Pool, poolMap[strconv.Itoa(index)])
174+
releaseEndpointIPsFlag = true
175+
break
176+
}
177+
}
178+
if _, ok := poolMap[ip.NIC][*ip.IPv6Pool]; !ok && ip.NIC == *addArgs.IfName {
179+
logger.Sugar().Infof("StatefulSet Pod need to release IP, owned pool: %v, expected pools: %v", *ip.IPv6Pool, poolMap[ip.NIC])
180+
releaseEndpointIPsFlag = true
181+
needReleaseEndpointIPs = append(needReleaseEndpointIPs, ip)
182+
continue
183+
}
144184
}
185+
186+
// According to the NIC allocation mechanism, we check whether the pool information for each NIC has changed.
187+
// If there is no change, we do not need to reclaim the corresponding endpoint and IP for that NIC.
188+
noReleaseEndpointIPs = append(noReleaseEndpointIPs, ip)
145189
}
146-
if !checkNicPoolExistence(endpointMap, poolMap) {
147-
logger.Sugar().Info("StatefulSet Pod need to release IP: owned pool %v, expected pools: %v", endpointMap, poolMap)
148-
if endpoint.DeletionTimestamp == nil {
149-
logger.Sugar().Infof("delete outdated endpoint of statefulset pod: %v/%v", endpoint.Namespace, endpoint.Name)
150-
if err := i.endpointManager.DeleteEndpoint(ctx, endpoint); err != nil {
190+
191+
if releaseEndpointIPsFlag {
192+
if isMultipleNicWithNoName || len(needReleaseEndpointIPs) == len(endpoint.Status.Current.IPs) {
193+
// The endpoint should be deleted in the following cases:
194+
// 1. If the multi-NIC feature is used through ipam.spidernet.io/ippools without specifying the interface name, and the IPPool has changed.
195+
// 2. In other multi-NIC or single-NIC scenarios, if the IPPool of all NICs has changed.
196+
logger.Sugar().Infof("remove outdated of StatefulSet pod %s/%s: %v", endpoint.Namespace, endpoint.Name, endpoint.Status.Current.IPs)
197+
if endpoint.DeletionTimestamp == nil {
198+
logger.Sugar().Infof("delete outdated endpoint of statefulset pod: %v/%v", endpoint.Namespace, endpoint.Name)
199+
if err := i.endpointManager.DeleteEndpoint(ctx, endpoint); err != nil {
200+
return false, err
201+
}
202+
}
203+
if err := i.endpointManager.RemoveFinalizer(ctx, endpoint); err != nil {
204+
return false, fmt.Errorf("failed to clean statefulset pod's endpoint when expected ippool was changed: %v", err)
205+
}
206+
err := i.release(ctx, endpoint.Status.Current.UID, endpoint.Status.Current.IPs)
207+
if err != nil {
151208
return false, err
152209
}
210+
logger.Sugar().Infof("remove outdated of StatefulSet Pod IPs: %v in Pool", endpoint.Status.Current.IPs)
211+
} else {
212+
// Only update the endpoint and IP corresponding to the changed NIC.
213+
logger.Sugar().Infof("try to update the endpoint IPs of the StatefulSet Pod. Old: %+v, New: %+v.", endpoint.Status.Current.IPs, noReleaseEndpointIPs)
214+
if err := i.endpointManager.PatchEndpointAllocationIPs(ctx, endpoint, noReleaseEndpointIPs); err != nil {
215+
return false, err
216+
}
217+
err := i.release(ctx, endpoint.Status.Current.UID, needReleaseEndpointIPs)
218+
if err != nil {
219+
return false, err
220+
}
221+
logger.Sugar().Infof("remove outdated of StatefulSet Pod IPs: %v in Pool", needReleaseEndpointIPs)
153222
}
154-
err := i.release(ctx, endpoint.Status.Current.UID, endpoint.Status.Current.IPs)
155-
if err != nil {
156-
return false, err
157-
}
158-
logger.Sugar().Info("remove outdated of StatefulSet pod %s/%s: %v", endpoint.Namespace, endpoint.Name, endpoint.Status.Current.IPs)
159-
if err := i.endpointManager.RemoveFinalizer(ctx, endpoint); err != nil {
160-
return false, fmt.Errorf("failed to clean statefulset pod's Endpoint when expected ippool was changed: %v", err)
161-
}
162-
endpoint = nil
223+
163224
return true, nil
164225
} else {
165-
logger.Sugar().Debugf("StatefulSet Pod does not need to release IP: owned pool %v, expected pools: %v", endpointMap, poolMap)
226+
logger.Sugar().Debugf("StatefulSet Pod does not need to release IP: %v", endpoint.Status.Current.IPs)
166227
}
167228
return false, nil
168229
}

pkg/ipam/utils.go

+3-19
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,10 @@ func IsMultipleNicWithNoName(anno map[string]string) bool {
178178
return false
179179
}
180180

181-
result := true
181+
result := false
182182
for _, v := range annoPodIPPools {
183-
if v.NIC != "" {
184-
result = false
183+
if v.NIC == "" {
184+
result = true
185185
}
186186
}
187187

@@ -216,19 +216,3 @@ func validateAndMutateMultipleNICAnnotations(annoIPPoolsValue types.AnnoPodIPPoo
216216

217217
return nil
218218
}
219-
220-
func checkNicPoolExistence(endpointMap, poolMap map[string]map[string]struct{}) bool {
221-
for outerKey, innerMap := range endpointMap {
222-
poolInnerMap, exists := poolMap[outerKey]
223-
if !exists {
224-
return false
225-
}
226-
227-
for innerKey := range innerMap {
228-
if _, exists := poolInnerMap[innerKey]; !exists {
229-
return false
230-
}
231-
}
232-
}
233-
return true
234-
}

pkg/ippoolmanager/ippool_manager.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,12 @@ func (im *ipPoolManager) genRandomIP(ctx context.Context, ipPool *spiderpoolv2be
171171
// Check if there is a duplicate Pod UID in IPPool.allocatedRecords.
172172
// If so, we skip this allocation and assume that this Pod has already obtained an IP address in the pool.
173173
if record.PodUID == string(pod.UID) {
174-
logger.Sugar().Warnf("The Pod %s/%s UID %s already exists in the assigned IP %s", pod.Namespace, pod.Name, ip, string(pod.UID))
174+
logger.Sugar().Infof("The Pod %s/%s UID %s already exists in the assigned IP %s", pod.Namespace, pod.Name, ip, string(pod.UID))
175175
return net.ParseIP(ip), nil
176176
}
177177
used = append(used, ip)
178178
}
179+
179180
usedIPs, err := spiderpoolip.ParseIPRanges(*ipPool.Spec.IPVersion, used)
180181
if err != nil {
181182
return nil, err

pkg/workloadendpointmanager/workloadendpoint_manager.go

+30-3
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ type WorkloadEndpointManager interface {
3434
UpdateAllocationNICName(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, nic string) (*spiderpoolv2beta1.PodIPAllocation, error)
3535
ReleaseEndpointIPs(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, uid string) ([]spiderpoolv2beta1.IPAllocationDetail, error)
3636
ReleaseEndpointAndFinalizer(ctx context.Context, namespace, podName string, cached bool) error
37+
PatchEndpointAllocationIPs(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, endpointIPs []spiderpoolv2beta1.IPAllocationDetail) error
3738
}
3839

3940
type workloadEndpointManager struct {
@@ -164,8 +165,21 @@ func (em *workloadEndpointManager) PatchIPAllocationResults(ctx context.Context,
164165
return nil
165166
}
166167

167-
// TODO(iiiceoo): Only append records with different NIC.
168-
endpoint.Status.Current.IPs = append(endpoint.Status.Current.IPs, convert.ConvertResultsToIPDetails(results, isMultipleNicWithNoName)...)
168+
// Using ipam.spidernet.io/ippools to specify multiple NICs,
169+
// if only one NIC's IP pool changes, only the changed NIC needs to have its IP address reassigned, while the other NICs remain unaffected.
170+
convertResults := convert.ConvertResultsToIPDetails(results, isMultipleNicWithNoName)
171+
for _, result := range convertResults {
172+
exists := false
173+
for _, existingIP := range endpoint.Status.Current.IPs {
174+
if existingIP.NIC == result.NIC {
175+
exists = true
176+
break
177+
}
178+
}
179+
if !exists {
180+
endpoint.Status.Current.IPs = append(endpoint.Status.Current.IPs, result)
181+
}
182+
}
169183
logger.Sugar().Infof("try to update SpiderEndpoint %s", endpoint)
170184
return em.client.Update(ctx, endpoint)
171185
}
@@ -219,7 +233,6 @@ func (em *workloadEndpointManager) UpdateAllocationNICName(ctx context.Context,
219233
break
220234
}
221235
}
222-
223236
err := em.client.Update(ctx, endpoint)
224237
if nil != err {
225238
return nil, err
@@ -228,6 +241,20 @@ func (em *workloadEndpointManager) UpdateAllocationNICName(ctx context.Context,
228241
return &endpoint.Status.Current, nil
229242
}
230243

244+
// PatchEndpointAllocationIPs will patch the SpiderEndpoint status recorded IPs.
245+
func (em *workloadEndpointManager) PatchEndpointAllocationIPs(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, newEndpointIPs []spiderpoolv2beta1.IPAllocationDetail) error {
246+
log := logutils.FromContext(ctx)
247+
248+
endpoint.Status.Current.IPs = newEndpointIPs
249+
log.Sugar().Debugf("try to update SpiderEndpoint recorded IPs: %s", endpoint)
250+
err := em.client.Update(ctx, endpoint)
251+
if nil != err {
252+
return err
253+
}
254+
255+
return nil
256+
}
257+
231258
// ReleaseEndpointIPs will release the SpiderEndpoint status recorded IPs.
232259
func (em *workloadEndpointManager) ReleaseEndpointIPs(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, podUID string) ([]spiderpoolv2beta1.IPAllocationDetail, error) {
233260
log := logutils.FromContext(ctx)

pkg/workloadendpointmanager/workloadendpoint_manager_test.go

+47
Original file line numberDiff line numberDiff line change
@@ -718,5 +718,52 @@ var _ = Describe("WorkloadEndpointManager", Label("workloadendpoint_manager_test
718718
Expect(err).To(MatchError(constant.ErrUnknown))
719719
})
720720
})
721+
722+
Describe("PatchEndpointAllocationIPs", func() {
723+
var endpointT *spiderpoolv2beta1.SpiderEndpoint
724+
var newEndpointIPs []spiderpoolv2beta1.IPAllocationDetail
725+
726+
BeforeEach(func() {
727+
endpointT = &spiderpoolv2beta1.SpiderEndpoint{
728+
ObjectMeta: metav1.ObjectMeta{
729+
Name: "test-endpoint",
730+
Namespace: "default",
731+
},
732+
Status: spiderpoolv2beta1.WorkloadEndpointStatus{
733+
Current: spiderpoolv2beta1.PodIPAllocation{
734+
IPs: []spiderpoolv2beta1.IPAllocationDetail{
735+
{NIC: "eth0", IPv4: ptr.To("192.168.1.1/24")},
736+
},
737+
},
738+
},
739+
}
740+
741+
newEndpointIPs = []spiderpoolv2beta1.IPAllocationDetail{
742+
{NIC: "eth0", IPv4: ptr.To("192.168.1.2/24")},
743+
{NIC: "eth1", IPv4: ptr.To("192.168.1.3/24")},
744+
}
745+
})
746+
747+
It("successfully patches the SpiderEndpoint with new IPs", func() {
748+
err := fakeClient.Create(ctx, endpointT)
749+
Expect(err).NotTo(HaveOccurred())
750+
751+
err = endpointManager.PatchEndpointAllocationIPs(ctx, endpointT, newEndpointIPs)
752+
Expect(err).NotTo(HaveOccurred())
753+
754+
var updatedEndpoint spiderpoolv2beta1.SpiderEndpoint
755+
err = fakeClient.Get(ctx, types.NamespacedName{Namespace: endpointT.Namespace, Name: endpointT.Name}, &updatedEndpoint)
756+
Expect(err).NotTo(HaveOccurred())
757+
Expect(updatedEndpoint.Status.Current.IPs).To(Equal(newEndpointIPs))
758+
})
759+
760+
It("fails to patch the SpiderEndpoint due to update error", func() {
761+
patches := gomonkey.ApplyMethodReturn(fakeClient, "Update", constant.ErrUnknown)
762+
defer patches.Reset()
763+
764+
err := endpointManager.PatchEndpointAllocationIPs(ctx, endpointT, newEndpointIPs)
765+
Expect(err).To(MatchError(constant.ErrUnknown))
766+
})
767+
})
721768
})
722769
})

test/doc/annotation.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,6 @@
1616
| A00013 | It's invalid to specify one NIC corresponding IPPool in IPPools annotation with multiple NICs | p2 | | done | |
1717
| A00014 | It's invalid to specify same NIC name for IPPools annotation with multiple NICs | p2 | | done | |
1818
| A00015 | Use wildcard for 'ipam.spidernet.io/ippools' annotation to specify IPPools | p2 | | done | |
19-
| A00016 | In the annotation ipam.spidernet.io/ippools for multi-NICs, when the IP pool for one NIC runs out of IPs, it should not exhaust IPs from other pools. | p2 | | done | |
19+
| A00016 | In the annotation ipam.spidernet.io/ippools for multi-NICs, when the IP pool for one NIC runs out of IPs, it should not exhaust IPs from other pools. | p2 | | done | |
20+
| A00017 | Stateful applications can use multiple NICs via k8s.v1.cni.cncf.io/networks, enabling creation, restart, and IP address changes. | p3 | | done | |
21+
| A00018 | Stateful applications using the annotation ipam.spidernet.io/ippools without specifying a NIC name can still create Pods, restart them, and update their IP addresses. | p3 | | done | |

0 commit comments

Comments
 (0)