Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions pkg/app/instances/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"fmt"
"io"
"log"
"net/http"
"net/url"
"strings"
"sync"
Expand Down Expand Up @@ -188,6 +189,13 @@ func (m *DockerInstanceManager) waitCreateHostOperation(host string) (*apiv1.Hos
return nil, fmt.Errorf("failed to inspect docker container: %w", err)
}
if res.State.Running {
client, err := m.GetHostClient("local", host)
if err != nil {
return nil, err
}
if err := waitForHostReady(client); err != nil {
return nil, err
}
return &apiv1.HostInstance{
Name: host,
}, nil
Expand Down Expand Up @@ -500,3 +508,18 @@ func (m *DockerInstanceManager) getRWMutex(user accounts.User) *sync.RWMutex {
mu, _ := m.mutexes.LoadOrStore(user.Username(), &sync.RWMutex{})
return mu.(*sync.RWMutex)
}

func waitForHostReady(client HostClient) error {
maxWait := 2 * time.Minute
retryDelay := 5 * time.Second
deadline := time.Now().Add(maxWait)

for time.Now().Before(deadline) {
status, err := client.Get("/", "", nil)
if err == nil && status != http.StatusBadGateway {
return nil
}
time.Sleep(retryDelay)
}
return errors.NewServiceUnavailableError("wait for host orchestrator timed out", nil)
}
52 changes: 52 additions & 0 deletions pkg/app/instances/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package instances

import (
"net/http"
"net/http/httputil"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -52,3 +54,53 @@ func TestDecodeOperationFailsMissingUnderscore(t *testing.T) {
t.Errorf("expected error")
}
}

type mockHostClient struct {
getFunc func(string, string, *HostResponse) (int, error)
}

func (c *mockHostClient) Get(path, query string, res *HostResponse) (int, error) {
return c.getFunc(path, query, res)
}

func (c *mockHostClient) Post(path, query string, body any, res *HostResponse) (int, error) {
return 0, nil
}

func (c *mockHostClient) GetReverseProxy() *httputil.ReverseProxy {
return nil
}

func TestWaitForHostReadySucceeds(t *testing.T) {
client := &mockHostClient{
getFunc: func(path, query string, res *HostResponse) (int, error) {
return http.StatusOK, nil
},
}

err := waitForHostReady(client)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
}

func TestWaitForHostReadyRetriesOnBadGateway(t *testing.T) {
calls := 0
client := &mockHostClient{
getFunc: func(path, query string, res *HostResponse) (int, error) {
calls++
if calls == 1 {
return http.StatusBadGateway, nil
}
return http.StatusOK, nil
},
}

err := waitForHostReady(client)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if calls != 2 {
t.Errorf("expected 2 calls, got %d", calls)
}
}
111 changes: 96 additions & 15 deletions pkg/app/instances/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ import (
"context"
"fmt"
"log"
"net/http"
"net/url"
"path"
"regexp"
"time"

apiv1 "github.com/google/cloud-android-orchestration/api/v1"
"github.com/google/cloud-android-orchestration/pkg/app/accounts"
Expand All @@ -41,6 +43,7 @@ type GCPIMConfig struct {
UseExternalIP bool
// If true, instances created should be compatible with `acloud CLI`.
AcloudCompatible bool
HostReadyTimeout time.Duration
}

const (
Expand Down Expand Up @@ -78,28 +81,40 @@ func (m *GCEInstanceManager) ListZones() (*apiv1.ListZonesResponse, error) {
}, nil
}

func (m *GCEInstanceManager) GetHostAddr(zone string, host string) (string, error) {
instance, err := m.getHostInstance(zone, host)
if err != nil {
return "", err
}
ilen := len(instance.NetworkInterfaces)
func getHostAddrWithIns(ins *compute.Instance) (string, error) {
ilen := len(ins.NetworkInterfaces)
if ilen == 0 {
log.Printf("host instance %s in zone %s is missing a network interface", host, zone)
log.Printf("host instance %s in zone %s is missing a network interface", ins.Name, ins.Zone)
return "", errors.NewInternalError("host instance missing a network interface", nil)
}
if ilen > 1 {
log.Printf("host instance %s in zone %s has %d network interfaces", host, zone, ilen)
log.Printf("host instance %s in zone %s has %d network interfaces", ins.Name, ins.Zone, ilen)
}
return ins.NetworkInterfaces[0].NetworkIP, nil
}

func (m *GCEInstanceManager) GetHostAddr(zone string, host string) (string, error) {
ins, err := m.getHostInstance(zone, host)
if err != nil {
return "", err
}
return getHostAddrWithIns(ins)
}

func getHostURLWithIns(ins *compute.Instance, config *Config) (*url.URL, error) {
addr, err := getHostAddrWithIns(ins)
if err != nil {
return nil, err
}
return instance.NetworkInterfaces[0].NetworkIP, nil
return url.Parse(fmt.Sprintf("%s://%s:%d", config.HostOrchestratorProtocol, addr, config.GCP.HostOrchestratorPort))
}

func (m *GCEInstanceManager) GetHostURL(zone string, host string) (*url.URL, error) {
addr, err := m.GetHostAddr(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
return url.Parse(fmt.Sprintf("%s://%s:%d", m.Config.HostOrchestratorProtocol, addr, m.Config.GCP.HostOrchestratorPort))
return getHostURLWithIns(ins, &m.Config)
}

const operationStatusDone = "DONE"
Expand Down Expand Up @@ -253,16 +268,81 @@ func (m *GCEInstanceManager) WaitOperation(zone string, user accounts.User, name
if op.Status != operationStatusDone {
return nil, errors.NewServiceUnavailableError("Wait for operation timed out", nil)
}
getter := opResultGetter{Service: m.Service, Op: op}
return getter.Get()
getter := opResultGetter{
Service: m.Service,
Op: op,
Config: &m.Config,
}
res, err := getter.Get()
if err != nil {
return nil, err
}
if hostInst, ok := res.(*apiv1.HostInstance); ok && op.OperationType == "insert" {
return m.WaitHostAvailability(zone, user, hostInst.Name)
}
return res, nil
}

func (m *GCEInstanceManager) WaitHostAvailability(zone string, user accounts.User, host string) (*apiv1.HostInstance, error) {
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
hostInstance, err := BuildHostInstance(ins)
if err != nil {
return nil, err
}
client, err := getHostClientWithIns(ins, &m.Config)
if err != nil {
return nil, err
}
if err := m.waitForOrchestrator(zone, hostInstance, client); err != nil {
return nil, err
}
return hostInstance, nil
}

func (m *GCEInstanceManager) waitForOrchestrator(zone string, host *apiv1.HostInstance, client HostClient) error {
timeout := m.Config.GCP.HostReadyTimeout
if timeout == 0 {
timeout = 5 * time.Minute
}
retryDelay := 5 * time.Second
deadline := time.Now().Add(timeout)

for time.Now().Before(deadline) {
_, err := m.Service.Instances.Get(m.Config.GCP.ProjectID, zone, host.Name).Context(context.TODO()).Do()
if err != nil {
if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == http.StatusNotFound {
return errors.NewNotFoundError("Host was deleted concurrently", err)
}
return fmt.Errorf("failed to check host existence: %w", err)
}

status, err := client.Get("/", "", nil)
if err == nil && status != http.StatusBadGateway {
return nil
}

time.Sleep(retryDelay)
}
return errors.NewServiceUnavailableError("Wait for host orchestrator timed out", nil)
}

func getHostClientWithIns(ins *compute.Instance, config *Config) (HostClient, error) {
url, err := getHostURLWithIns(ins, config)
if err != nil {
return nil, err
}
return NewNetHostClient(url, config.AllowSelfSignedHostSSLCertificate), nil
}

func (m *GCEInstanceManager) GetHostClient(zone string, host string) (HostClient, error) {
url, err := m.GetHostURL(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
return NewNetHostClient(url, m.Config.AllowSelfSignedHostSSLCertificate), nil
return getHostClientWithIns(ins, &m.Config)
}

func (m *GCEInstanceManager) getHostInstance(zone string, host string) (*compute.Instance, error) {
Expand Down Expand Up @@ -326,6 +406,7 @@ var (
type opResultGetter struct {
Service *compute.Service
Op *compute.Operation
Config *Config
}

func (g *opResultGetter) Get() (any, error) {
Expand Down
Loading
Loading