@@ -19,12 +19,16 @@ package gcp
19
19
import (
20
20
"context"
21
21
"fmt"
22
+ "strconv"
22
23
"strings"
23
24
"time"
24
25
26
+ compute "cloud.google.com/go/compute/apiv1"
25
27
"cloud.google.com/go/compute/apiv1/computepb"
26
28
"cloud.google.com/go/compute/metadata"
29
+ "github.com/agrea/ptr"
27
30
"google.golang.org/api/iterator"
31
+ "k8s.io/klog/v2"
28
32
29
33
"github.com/NVIDIA/topograph/pkg/topology"
30
34
)
@@ -39,72 +43,63 @@ type InstanceInfo struct {
39
43
name string
40
44
}
41
45
42
- func (p * Provider ) generateInstanceTopology (ctx context.Context , instanceToNodeMap map [string ]string ) (* InstanceTopology , error ) {
46
+ func (p * Provider ) generateInstanceTopology (ctx context.Context , pageSize * int , cis []topology.ComputeInstances ) (* InstanceTopology , error ) {
47
+ insTop := & InstanceTopology {
48
+ instances : []* InstanceInfo {},
49
+ }
50
+
51
+ maxRes := castPageSize (pageSize )
52
+ for _ , ci := range cis {
53
+ err := p .generateRegionInstanceTopology (ctx , insTop , maxRes , & ci )
54
+ if err != nil {
55
+ return nil , err
56
+ }
57
+ }
58
+
59
+ return insTop , nil
60
+ }
61
+
62
+ func (p * Provider ) generateRegionInstanceTopology (ctx context.Context , insTop * InstanceTopology , maxRes * uint32 , ci * topology.ComputeInstances ) error {
43
63
client , err := p .clientFactory ()
44
64
if err != nil {
45
- return nil , err
65
+ return fmt . Errorf ( "unable to get client: %v" , err )
46
66
}
47
67
48
68
projectID , err := metadata .ProjectIDWithContext (ctx )
49
69
if err != nil {
50
- return nil , fmt .Errorf ("unable to get project ID: %s " , err . Error () )
70
+ return fmt .Errorf ("unable to get project ID: %v " , err )
51
71
}
52
- listZoneRequest := computepb.ListZonesRequest {Project : projectID }
53
- zones := make ([]string , 0 )
54
72
55
- timeNow := time .Now ()
56
- res := client .Zones .List (ctx , & listZoneRequest )
57
- requestLatency .WithLabelValues ("ListZones" ).Observe (time .Since (timeNow ).Seconds ())
73
+ klog .InfoS ("Getting instance topology" , "region" , ci .Region , "project" , projectID )
58
74
59
- for {
60
- zone , err := res .Next ()
61
- if err == iterator .Done {
62
- break
63
- }
64
- zones = append (zones , * zone .Name )
75
+ req := computepb.ListInstancesRequest {
76
+ Project : projectID ,
77
+ Zone : ci .Region ,
78
+ MaxResults : maxRes ,
79
+ PageToken : nil ,
65
80
}
66
81
67
- instanceTopology := & InstanceTopology {instances : make ([]* InstanceInfo , 0 )}
82
+ var cycle int
83
+ for {
84
+ cycle ++
85
+ klog .V (4 ).Infof ("Starting cycle %d" , cycle )
68
86
69
- for _ , zone := range zones {
70
87
timeNow := time .Now ()
71
- listInstanceRequest := computepb. ListInstancesRequest { Project : projectID , Zone : zone }
88
+ resp := client . Instances . List ( ctx , & req )
72
89
requestLatency .WithLabelValues ("ListInstances" ).Observe (time .Since (timeNow ).Seconds ())
73
90
74
- resInstance := client .Instances .List (ctx , & listInstanceRequest )
75
- for {
76
- instance , err := resInstance .Next ()
77
- if err == iterator .Done {
78
- break
79
- }
80
- _ , isNodeInCluster := instanceToNodeMap [* instance .Name ]
91
+ processInstanceList (insTop , resp , ci )
81
92
82
- if instance .ResourceStatus == nil {
83
- resourceStatusNotFound .WithLabelValues (* instance .Name ).Set (1 )
84
- continue
85
- }
86
- resourceStatusNotFound .WithLabelValues (* instance .Name ).Set (0 )
93
+ klog .V (4 ).Infof ("Processed %d nodes" , len (insTop .instances ))
87
94
88
- if instance .ResourceStatus .PhysicalHost == nil {
89
- physicalHostNotFound .WithLabelValues (* instance .Name ).Set (1 )
90
- continue
91
- }
92
- physicalHostNotFound .WithLabelValues (* instance .Name ).Set (0 )
93
-
94
- if isNodeInCluster {
95
- tokens := strings .Split (* instance .ResourceStatus .PhysicalHost , "/" )
96
- physicalHostIDChunks .WithLabelValues (* instance .Name ).Set (float64 (getTokenCount (tokens )))
97
- instanceObj := & InstanceInfo {
98
- name : * instance .Name ,
99
- clusterID : tokens [1 ],
100
- rackID : tokens [2 ],
101
- }
102
- instanceTopology .instances = append (instanceTopology .instances , instanceObj )
103
- }
95
+ if token := resp .PageInfo ().Token ; token == "" {
96
+ break
97
+ } else {
98
+ req .PageToken = & token
104
99
}
105
100
}
106
101
107
- return instanceTopology , nil
102
+ return nil
108
103
}
109
104
110
105
func (cfg * InstanceTopology ) toGraph () (* topology.Vertex , error ) {
@@ -156,6 +151,42 @@ func (cfg *InstanceTopology) toGraph() (*topology.Vertex, error) {
156
151
return root , nil
157
152
}
158
153
154
+ func processInstanceList (insTop * InstanceTopology , resp * compute.InstanceIterator , ci * topology.ComputeInstances ) {
155
+ for {
156
+ instance , err := resp .Next ()
157
+ if err == iterator .Done {
158
+ return
159
+ }
160
+ instanceId := strconv .FormatUint (* instance .Id , 10 )
161
+ klog .Infof ("Checking INSTANCE %s" , instanceId )
162
+ if _ , ok := ci .Instances [instanceId ]; ok {
163
+ klog .Infof ("FOUND INSTANCE %s" , instanceId )
164
+ if instance .ResourceStatus == nil {
165
+ klog .Infof ("ResourceStatus is not set for INSTANCE %s" , instanceId )
166
+ resourceStatusNotFound .WithLabelValues (instanceId ).Set (1 )
167
+ continue
168
+ }
169
+ resourceStatusNotFound .WithLabelValues (instanceId ).Set (0 )
170
+
171
+ if instance .ResourceStatus .PhysicalHost == nil {
172
+ klog .Infof ("PhysicalHost is not set for INSTANCE %s" , instanceId )
173
+ physicalHostNotFound .WithLabelValues (instanceId ).Set (1 )
174
+ continue
175
+ }
176
+ physicalHostNotFound .WithLabelValues (instanceId ).Set (0 )
177
+
178
+ tokens := strings .Split (* instance .ResourceStatus .PhysicalHost , "/" )
179
+ physicalHostIDChunks .WithLabelValues (instanceId ).Set (float64 (getTokenCount (tokens )))
180
+ instanceObj := & InstanceInfo {
181
+ name : instanceId ,
182
+ clusterID : tokens [1 ],
183
+ rackID : tokens [2 ],
184
+ }
185
+ insTop .instances = append (insTop .instances , instanceObj )
186
+ }
187
+ }
188
+ }
189
+
159
190
func getTokenCount (tokens []string ) int {
160
191
c := 0
161
192
for _ , q := range tokens {
@@ -165,3 +196,11 @@ func getTokenCount(tokens []string) int {
165
196
}
166
197
return c
167
198
}
199
+
200
+ func castPageSize (val * int ) * uint32 {
201
+ if val == nil {
202
+ return nil
203
+ }
204
+
205
+ return ptr .Uint32 (uint32 (* val ))
206
+ }
0 commit comments