Skip to content
This repository was archived by the owner on Jun 2, 2026. It is now read-only.
40 changes: 40 additions & 0 deletions api/pkg/api/handler/allocation.go
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,46 @@ func (cah CreateAllocationHandler) Handle(c echo.Context) error {
} else {
logger.Info().Str("Workflow ID", we.GetID()).Msg("triggered workflow to create Tenant")
}

// Auto-associate tenant-owned Global-scoped iPXE OSes (both raw and
// Templated) with the new site. This mirrors the provider-side
// auto-expansion in the Site create handler.
globalTenantOSes, _, goserr := cdbm.NewOperatingSystemDAO(cah.dbSession).GetAll(
ctx, tx,
cdbm.OperatingSystemFilterInput{
TenantIDs: []uuid.UUID{tenant.ID},
OsTypes: []string{cdbm.OperatingSystemTypeIPXE, cdbm.OperatingSystemTypeTemplatedIPXE},
Scopes: []string{cdbm.OperatingSystemScopeGlobal},
},
cdbp.PageInput{Limit: cdb.GetIntPtr(cdbp.TotalLimit)},
nil,
)
if goserr != nil {
logger.Error().Err(goserr).Msg("error retrieving tenant global-scoped OSes for auto-expansion")
return cutil.NewAPIErrorResponse(c, http.StatusInternalServerError, "Failed to retrieve global-scoped OSes for tenant, DB error", nil)
}
ossaDAO := cdbm.NewOperatingSystemSiteAssociationDAO(cah.dbSession)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible that the OS was synced at some point when Tenant had Allocation to the Site and then their Allocations were removed.

If they get an Allocation again on the same Site, this will fail. We'll need to fetch their existing Site Associations and create only if it doesn't exist.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a test for use case and corrected implementation.

for _, gos := range globalTenantOSes {
_, aerr := ossaDAO.GetByOperatingSystemIDAndSiteID(ctx, tx, gos.ID, site.ID, nil)
if aerr != nil && aerr != cdb.ErrDoesNotExist {
logger.Error().Err(aerr).Str("osID", gos.ID.String()).Msg("Failed to check existing OS-site association")
return cutil.NewAPIErrorResponse(c, http.StatusInternalServerError, "Failed to associate global-scoped Operating Systems with new Site", nil)
}
if aerr == cdb.ErrDoesNotExist {
if _, aerr = ossaDAO.Create(ctx, tx, cdbm.OperatingSystemSiteAssociationCreateInput{
OperatingSystemID: gos.ID,
SiteID: site.ID,
Status: cdbm.OperatingSystemSiteAssociationStatusSyncing,
CreatedBy: dbUser.ID,
}); aerr != nil {
logger.Error().Err(aerr).Str("osID", gos.ID.String()).Msg("Failed to auto-associate tenant global OS with new site")
return cutil.NewAPIErrorResponse(c, http.StatusInternalServerError, "Failed to associate global-scoped Operating Systems with new Site", nil)
}
}
}
if len(globalTenantOSes) > 0 {
logger.Info().Int("count", len(globalTenantOSes)).Msg("Auto-associated tenant global-scoped OSes with new site")
}
}

// Commit transaction
Expand Down
120 changes: 120 additions & 0 deletions api/pkg/api/handler/allocation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,126 @@ func TestAllocationHandler_Create(t *testing.T) {
}
}

// TestAllocationHandler_Create_GlobalOSAutoAssociationIdempotent verifies that
// creating an Allocation for a tenant that previously had (and lost) access to a
// Site does not fail because the OperatingSystemSiteAssociation from the earlier
// allocation already exists.
//
// Scenario:
// 1. Tenant has a global-scoped IPXE OS.
// 2. First Allocation → TenantSite is created → OS is auto-associated with Site.
// 3. TenantSite is deleted to simulate all Allocations being removed.
// 4. Second Allocation (same tenant + site) → TenantSite is recreated → OS
// auto-association must be skipped (not fail) because the row still exists.
func TestAllocationHandler_Create_GlobalOSAutoAssociationIdempotent(t *testing.T) {
ctx := context.Background()
dbSession := testMachineInitDB(t)
defer dbSession.Close()

common.TestSetupSchema(t, dbSession)

ipOrg := "test-ip-org-idempotent"
tnOrg := "test-tn-org-idempotent"

ipu := testMachineBuildUser(t, dbSession, uuid.New().String(), []string{ipOrg}, []string{"FORGE_PROVIDER_ADMIN"})
tnu := testMachineBuildUser(t, dbSession, uuid.New().String(), []string{tnOrg}, []string{"FORGE_TENANT_ADMIN"})

ip := common.TestBuildInfrastructureProvider(t, dbSession, "TestIpIdempotent", ipOrg, ipu)
site := testIPBlockBuildSite(t, dbSession, ip, "testSiteIdempotent", cdbm.SiteStatusRegistered, true, ipu)
tenant := testMachineBuildTenant(t, dbSession, tnOrg, "t-idempotent")

it := common.TestBuildInstanceType(t, dbSession, "testITIdempotent", cdb.GetUUIDPtr(uuid.New()), site, map[string]string{
"name": "test-instance-type-idempotent",
"description": "Idempotent test instance type",
}, ipu)
for i := 0; i < 5; i++ {
mc := testInstanceBuildMachine(t, dbSession, ip.ID, site.ID, cdb.GetBoolPtr(false), nil)
require.NotNil(t, mc)
require.NotNil(t, testInstanceBuildMachineInstanceType(t, dbSession, mc, it))
}

ipb := testIPBlockBuildIPBlock(t, dbSession, "testipb-idempotent", site, ip, &tenant.ID,
cdbm.IPBlockRoutingTypeDatacenterOnly, "10.99.0.0", 16, cdbm.IPBlockProtocolVersionV4,
false, cdbm.IPBlockStatusReady, ipu)

ipamStorage := ipam.NewIpamStorage(dbSession.DB, nil)
_, err := ipam.CreateIpamEntryForIPBlock(ctx, ipamStorage, ipb.Prefix, ipb.PrefixLength,
ipb.RoutingType, ipb.InfrastructureProviderID.String(), ipb.SiteID.String())
require.NoError(t, err)

// A tenant-owned global-scoped IPXE OS — this is what the auto-association code targets.
globalScope := cdbm.OperatingSystemScopeGlobal
globalOS := &cdbm.OperatingSystem{
ID: uuid.New(),
Name: "global-os-idempotent",
TenantID: cdb.GetUUIDPtr(tenant.ID),
Type: cdbm.OperatingSystemTypeIPXE,
IpxeOsScope: &globalScope,
IpxeScript: cdb.GetStrPtr(common.DefaultIpxeScript),
IsActive: true,
Status: cdbm.OperatingSystemStatusReady,
CreatedBy: tnu.ID,
}
_, err = dbSession.DB.NewInsert().Model(globalOS).Exec(ctx)
require.NoError(t, err)

ac := model.APIAllocationConstraintCreateRequest{
ResourceType: cdbm.AllocationResourceTypeInstanceType,
ResourceTypeID: it.ID.String(),
ConstraintType: cdbm.AllocationConstraintTypeReserved,
ConstraintValue: 2,
}
body, err := json.Marshal(model.APIAllocationCreateRequest{
Name: "alloc-idempotent-1",
Description: cdb.GetStrPtr(""),
TenantID: tenant.ID.String(),
SiteID: site.ID.String(),
AllocationConstraints: []model.APIAllocationConstraintCreateRequest{ac},
})
require.NoError(t, err)

// First allocation: TenantSite is created and the global OS is auto-associated.
a1 := testCreateAllocation(t, dbSession, ipamStorage, ipu, ipOrg, string(body))
require.NotNil(t, a1)

ossaDAO := cdbm.NewOperatingSystemSiteAssociationDAO(dbSession)
_, err = ossaDAO.GetByOperatingSystemIDAndSiteID(ctx, nil, globalOS.ID, site.ID, nil)
require.NoError(t, err, "OS-site association must exist after first allocation")

// Simulate all Allocations being removed: delete the TenantSite record so that
// the next Allocation triggers TenantSite (and OS auto-association) logic again.
_, err = dbSession.DB.NewDelete().TableExpr("tenant_site").
Where("tenant_id = ? AND site_id = ?", tenant.ID, site.ID).Exec(ctx)
require.NoError(t, err)

body2, err := json.Marshal(model.APIAllocationCreateRequest{
Name: "alloc-idempotent-2",
Description: cdb.GetStrPtr(""),
TenantID: tenant.ID.String(),
SiteID: site.ID.String(),
AllocationConstraints: []model.APIAllocationConstraintCreateRequest{ac},
})
require.NoError(t, err)

// Second allocation on the same site: must succeed even though the
// OperatingSystemSiteAssociation from the first allocation still exists.
a2 := testCreateAllocation(t, dbSession, ipamStorage, ipu, ipOrg, string(body2))
require.NotNil(t, a2, "second allocation must succeed when OS-site association already exists")

// The association should still exist exactly once (not duplicated).
ossas, ossaCount, err := ossaDAO.GetAll(ctx, nil,
cdbm.OperatingSystemSiteAssociationFilterInput{
OperatingSystemIDs: []uuid.UUID{globalOS.ID},
SiteIDs: []uuid.UUID{site.ID},
},
cdbp.PageInput{},
nil,
)
require.NoError(t, err)
assert.Equal(t, 1, ossaCount, "OS-site association must exist exactly once after both allocations")
_ = ossas
}

func testCreateAllocation(t *testing.T, dbSession *cdb.Session, ipamStorage cipam.Storage, user *cdbm.User, reqOrgName, reqBody string) *model.APIAllocation {
ctx := context.Background()
e := echo.New()
Expand Down
1 change: 1 addition & 0 deletions api/pkg/api/handler/dpuextensionservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ func (cdesh CreateDpuExtensionServiceHandler) Handle(c echo.Context) error {
if err != nil {
var timeoutErr *tp.TimeoutError
if errors.As(err, &timeoutErr) {
// TODO: Terminate the workflow
logger.Error().Err(err).Msg("timed out executing DPU Extension Service creation workflow on Site")
return cutil.NewAPIErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Timed out executing DPU Extension Service creation workflow on Site: %s", err), nil)
}
Expand Down
Loading
Loading