Skip to content

UPSTREAM: 5532: Azure Stack Fork #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 5, 2025
8 changes: 8 additions & 0 deletions api/v1beta1/types_class.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type AzureClusterClassSpec struct {
// - GermanCloud: "AzureGermanCloud"
// - PublicCloud: "AzurePublicCloud"
// - USGovernmentCloud: "AzureUSGovernmentCloud"
// - StackCloud: "HybridEnvironment"
//
// Note that values other than the default must also be accompanied by corresponding changes to the
// aso-controller-settings Secret to configure ASO to refer to the non-Public cloud. ASO currently does
Expand Down Expand Up @@ -77,6 +78,12 @@ type AzureClusterClassSpec struct {
// See: https://learn.microsoft.com/azure/reliability/availability-zones-overview
// +optional
FailureDomains clusterv1.FailureDomains `json:"failureDomains,omitempty"`

// ARMEndpoint specifies a URL for the ARM Resource Manager endpoint.
// It may only be specified when the AzureEnvironment is set to AzureStackCloud,
// in which case it is required.
// +optional
ARMEndpoint string `json:"armEndpoint,omitempty"`
}

// AzureManagedControlPlaneClassSpec defines the AzureManagedControlPlane properties that may be shared across several azure managed control planes.
Expand Down Expand Up @@ -186,6 +193,7 @@ type AzureManagedControlPlaneClassSpec struct {
// - PublicCloud: "AzurePublicCloud"
// - USGovernmentCloud: "AzureUSGovernmentCloud"
//
//
// Note that values other than the default must also be accompanied by corresponding changes to the
// aso-controller-settings Secret to configure ASO to refer to the non-Public cloud. ASO currently does
// not support referring to multiple different clouds in a single installation. The following fields must
Expand Down
30 changes: 29 additions & 1 deletion azure/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel"
"github.com/Azure/go-autorest/autorest/azure"
"go.opentelemetry.io/otel"

"sigs.k8s.io/cluster-api-provider-azure/util/tele"
Expand All @@ -44,6 +45,8 @@ const (
ChinaCloudName = "AzureChinaCloud"
// USGovernmentCloudName is the name of the Azure US Government cloud.
USGovernmentCloudName = "AzureUSGovernmentCloud"
// StackCloudName is the name for Azure Stack hybrid cloud environments.
StackCloudName = "HybridEnvironment"
)

const (
Expand Down Expand Up @@ -109,6 +112,16 @@ const (
CustomHeaderPrefix = "infrastructure.cluster.x-k8s.io/custom-header-"
)

const (
// StackAPIVersion is the API version profile to set for ARM clients. See:
// https://learn.microsoft.com/en-us/azure-stack/user/azure-stack-profiles-azure-resource-manager-versions?view=azs-2408#overview-of-the-2020-09-01-hybrid-profile
StackAPIVersionProfile = "2020-06-01"

// StackDiskAPIVersionProfile is the API Version to set for the disk client.
// API Version Profile "2020-06-01" is not supported for disks.
StackDiskAPIVersionProfile = "2018-06-01"
)

var (
// LinuxBootstrapExtensionCommand is the command the VM bootstrap extension will execute to verify Linux nodes bootstrap completes successfully.
LinuxBootstrapExtensionCommand = fmt.Sprintf("for i in $(seq 1 %d); do test -f %s && break; if [ $i -eq %d ]; then exit 1; else sleep %d; fi; done", bootstrapExtensionRetries, bootstrapSentinelFile, bootstrapExtensionRetries, bootstrapExtensionSleep)
Expand Down Expand Up @@ -357,7 +370,7 @@ func UserAgent() string {
}

// ARMClientOptions returns default ARM client options for CAPZ SDK v2 requests.
func ARMClientOptions(azureEnvironment string, extraPolicies ...policy.Policy) (*arm.ClientOptions, error) {
func ARMClientOptions(azureEnvironment, armEndpoint string, extraPolicies ...policy.Policy) (*arm.ClientOptions, error) {
opts := &arm.ClientOptions{}

switch azureEnvironment {
Expand All @@ -367,6 +380,21 @@ func ARMClientOptions(azureEnvironment string, extraPolicies ...policy.Policy) (
opts.Cloud = cloud.AzureChina
case USGovernmentCloudName:
opts.Cloud = cloud.AzureGovernment
case StackCloudName:
cloudEnv, err := azure.EnvironmentFromURL(armEndpoint)
if err != nil {
return nil, fmt.Errorf("unable to get Azure Stack cloud environment: %w", err)
}
opts.APIVersion = StackAPIVersionProfile
opts.Cloud = cloud.Configuration{
ActiveDirectoryAuthorityHost: cloudEnv.ActiveDirectoryEndpoint,
Services: map[cloud.ServiceName]cloud.ServiceConfiguration{
cloud.ResourceManager: {
Audience: cloudEnv.TokenAudience,
Endpoint: cloudEnv.ResourceManagerEndpoint,
},
},
}
case "":
// No cloud name provided, so leave at defaults.
default:
Expand Down
7 changes: 4 additions & 3 deletions azure/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func TestARMClientOptions(t *testing.T) {
tests := []struct {
name string
cloudName string
armEndpoint string
expectedCloud cloud.Configuration
expectError bool
}{
Expand Down Expand Up @@ -72,7 +73,7 @@ func TestARMClientOptions(t *testing.T) {
t.Parallel()
g := NewWithT(t)

opts, err := ARMClientOptions(tc.cloudName)
opts, err := ARMClientOptions(tc.cloudName, tc.armEndpoint)
if tc.expectError {
g.Expect(err).To(HaveOccurred())
return
Expand All @@ -99,7 +100,7 @@ func TestPerCallPolicies(t *testing.T) {
defer server.Close()

// Call the factory function and ensure it has both PerCallPolicies.
opts, err := ARMClientOptions("")
opts, err := ARMClientOptions("", "")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(opts.PerCallPolicies).To(HaveLen(2))
g.Expect(opts.PerCallPolicies).To(ContainElement(BeAssignableToTypeOf(correlationIDPolicy{})))
Expand Down Expand Up @@ -184,7 +185,7 @@ func TestCustomPutPatchHeaderPolicy(t *testing.T) {
// Create options with a custom PUT/PATCH header per-call policy
getterMock := mock_azure.NewMockResourceSpecGetterWithHeaders(mockCtrl)
getterMock.EXPECT().CustomHeaders().Return(tc.headers).AnyTimes()
opts, err := ARMClientOptions("", CustomPutPatchHeaderPolicy{Headers: tc.headers})
opts, err := ARMClientOptions("", "", CustomPutPatchHeaderPolicy{Headers: tc.headers})
g.Expect(err).NotTo(HaveOccurred())

// Create a request
Expand Down
6 changes: 6 additions & 0 deletions azure/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ func ResourceNotFound(err error) bool {
return errors.As(err, &rerr) && rerr.StatusCode == http.StatusNotFound
}

// BadRequest parses an error to check if it its status code is Bad Request (400).
func BadRequest(err error) bool {
var rerr *azcore.ResponseError
return errors.As(err, &rerr) && rerr.StatusCode == http.StatusBadRequest
}

// VMDeletedError is returned when a virtual machine is deleted outside of capz.
type VMDeletedError struct {
ProviderID string
Expand Down
8 changes: 5 additions & 3 deletions azure/scope/clients.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ func (c *AzureClients) HashKey() string {
return base64.URLEncoding.EncodeToString(hasher.Sum(nil))
}

func (c *AzureClients) setCredentialsWithProvider(ctx context.Context, subscriptionID, environmentName string, credentialsProvider CredentialsProvider) error {
func (c *AzureClients) setCredentialsWithProvider(ctx context.Context, subscriptionID, environmentName, armEndpoint string, credentialsProvider CredentialsProvider) error {
if credentialsProvider == nil {
return fmt.Errorf("credentials provider cannot have an empty value")
}

settings, err := c.getSettingsFromEnvironment(environmentName)
settings, err := c.getSettingsFromEnvironment(environmentName, armEndpoint)
if err != nil {
return err
}
Expand Down Expand Up @@ -121,7 +121,7 @@ func (c *AzureClients) setCredentialsWithProvider(ctx context.Context, subscript
return err
}

func (c *AzureClients) getSettingsFromEnvironment(environmentName string) (s auth.EnvironmentSettings, err error) {
func (c *AzureClients) getSettingsFromEnvironment(environmentName, armEndpoint string) (s auth.EnvironmentSettings, err error) {
s = auth.EnvironmentSettings{
Values: map[string]string{},
}
Expand All @@ -138,6 +138,8 @@ func (c *AzureClients) getSettingsFromEnvironment(environmentName string) (s aut
setValue(s, "AZURE_AD_RESOURCE")
if v := s.Values["AZURE_ENVIRONMENT"]; v == "" {
s.Environment = azureautorest.PublicCloud
} else if len(armEndpoint) > 0 {
s.Environment, err = azureautorest.EnvironmentFromURL(armEndpoint)
} else {
s.Environment, err = azureautorest.EnvironmentFromName(v)
}
Expand Down
10 changes: 8 additions & 2 deletions azure/scope/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ func NewClusterScope(ctx context.Context, params ClusterScopeParams) (*ClusterSc
if err != nil {
return nil, errors.Wrap(err, "failed to init credentials provider")
}
err = params.AzureClients.setCredentialsWithProvider(ctx, params.AzureCluster.Spec.SubscriptionID, params.AzureCluster.Spec.AzureEnvironment, credentialsProvider)
spec := params.AzureCluster.Spec
err = params.AzureClients.setCredentialsWithProvider(ctx, spec.SubscriptionID, spec.AzureEnvironment, spec.ARMEndpoint, credentialsProvider)
if err != nil {
return nil, errors.Wrap(err, "failed to configure azure settings and credentials for Identity")
}
Expand Down Expand Up @@ -557,7 +558,7 @@ func (s *ClusterScope) VNetSpec() azure.ASOResourceSpecGetter[*asonetworkv1api20

// PrivateDNSSpec returns the private dns zone spec.
func (s *ClusterScope) PrivateDNSSpec() (zoneSpec azure.ResourceSpecGetter, linkSpec, recordSpec []azure.ResourceSpecGetter) {
if s.IsAPIServerPrivate() {
if s.IsAPIServerPrivate() && !s.IsHybridEnvironment() {
resourceGroup := s.ResourceGroup()
if s.AzureCluster.Spec.NetworkSpec.PrivateDNSZoneResourceGroup != "" {
resourceGroup = s.AzureCluster.Spec.NetworkSpec.PrivateDNSZoneResourceGroup
Expand Down Expand Up @@ -1233,3 +1234,8 @@ func (s *ClusterScope) getLastAppliedSecurityRules(nsgName string) map[string]in
}
return lastAppliedSecurityRules
}

// IsHybridEnvironment returns true if the cluster is running on Azure Stack.
func (s *ClusterScope) IsHybridEnvironment() bool {
return strings.EqualFold(s.Environment.Name, azure.StackCloudName)
}
16 changes: 9 additions & 7 deletions azure/scope/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ func (m *MachineScope) InitMachineCache(ctx context.Context) error {
}

m.cache.availabilitySetSKU, err = skuCache.Get(ctx, string(armcompute.AvailabilitySetSKUTypesAligned), resourceskus.AvailabilitySets)
if err != nil {
// Resource SKU API for availability sets may not be available in Azure Stack environments.
if err != nil && !strings.EqualFold(m.CloudEnvironment(), "HybridEnvironment") {
return errors.Wrapf(err, "failed to get availability set SKU %s in compute api", string(armcompute.AvailabilitySetSKUTypesAligned))
}
}
Expand Down Expand Up @@ -494,12 +495,13 @@ func (m *MachineScope) AvailabilitySetSpec() azure.ResourceSpecGetter {
}

spec := &availabilitysets.AvailabilitySetSpec{
Name: availabilitySetName,
ResourceGroup: m.NodeResourceGroup(),
ClusterName: m.ClusterName(),
Location: m.Location(),
SKU: nil,
AdditionalTags: m.AdditionalTags(),
Name: availabilitySetName,
ResourceGroup: m.NodeResourceGroup(),
ClusterName: m.ClusterName(),
Location: m.Location(),
CloudEnvironment: m.CloudEnvironment(),
SKU: nil,
AdditionalTags: m.AdditionalTags(),
}

if m.cache != nil {
Expand Down
2 changes: 1 addition & 1 deletion azure/scope/managedcontrolplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func NewManagedControlPlaneScope(ctx context.Context, params ManagedControlPlane
return nil, errors.Wrap(err, "failed to init credentials provider")
}

if err := params.AzureClients.setCredentialsWithProvider(ctx, params.ControlPlane.Spec.SubscriptionID, params.ControlPlane.Spec.AzureEnvironment, credentialsProvider); err != nil {
if err := params.AzureClients.setCredentialsWithProvider(ctx, params.ControlPlane.Spec.SubscriptionID, params.ControlPlane.Spec.AzureEnvironment, "", credentialsProvider); err != nil {
return nil, errors.Wrap(err, "failed to configure azure settings and credentials for Identity")
}

Expand Down
2 changes: 1 addition & 1 deletion azure/services/availabilitysets/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type AzureClient struct {

// NewClient creates a new availability sets client from an authorizer.
func NewClient(auth azure.Authorizer) (*AzureClient, error) {
opts, err := azure.ARMClientOptions(auth.CloudEnvironment())
opts, err := azure.ARMClientOptions(auth.CloudEnvironment(), auth.BaseURI())
if err != nil {
return nil, errors.Wrap(err, "failed to create availabilitysets client options")
}
Expand Down
53 changes: 35 additions & 18 deletions azure/services/availabilitysets/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,27 @@ package availabilitysets
import (
"context"
"strconv"
"strings"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/pkg/errors"
"k8s.io/utils/ptr"

infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/azure"
"sigs.k8s.io/cluster-api-provider-azure/azure/converters"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/resourceskus"
)

// AvailabilitySetSpec defines the specification for an availability set.
type AvailabilitySetSpec struct {
Name string
ResourceGroup string
ClusterName string
Location string
SKU *resourceskus.SKU
AdditionalTags infrav1.Tags
Name string
ResourceGroup string
ClusterName string
Location string
CloudEnvironment string
SKU *resourceskus.SKU
AdditionalTags infrav1.Tags
}

// ResourceName returns the name of the availability set.
Expand Down Expand Up @@ -64,20 +67,10 @@ func (s *AvailabilitySetSpec) Parameters(_ context.Context, existing interface{}
return nil, nil
}

if s.SKU == nil {
return nil, errors.New("unable to get required availability set SKU from machine cache")
}

var faultDomainCount *int32
faultDomainCountStr, ok := s.SKU.GetCapability(resourceskus.MaximumPlatformFaultDomainCount)
if !ok {
return nil, errors.Errorf("unable to get required availability set SKU capability %s", resourceskus.MaximumPlatformFaultDomainCount)
}
count, err := strconv.ParseInt(faultDomainCountStr, 10, 32)
faultDomainCount, err := getFaultDomainCount(s.SKU, s.CloudEnvironment)
if err != nil {
return nil, errors.Wrapf(err, "unable to parse availability set fault domain count")
return nil, err
}
faultDomainCount = ptr.To[int32](int32(count))

asParams := armcompute.AvailabilitySet{
SKU: &armcompute.SKU{
Expand All @@ -98,3 +91,27 @@ func (s *AvailabilitySetSpec) Parameters(_ context.Context, existing interface{}

return asParams, nil
}

func getFaultDomainCount(SKU *resourceskus.SKU, cloudEnvironment string) (*int32, error) {
// Azure Stack environments may not implement the resource SKU API
// for availability sets. Use a default value instead.
if strings.EqualFold(cloudEnvironment, azure.StackCloudName) {
return ptr.To(int32(2)), nil
}

if SKU == nil {
return nil, errors.New("unable to get required availability set SKU from machine cache")
}

var faultDomainCount *int32
faultDomainCountStr, ok := SKU.GetCapability(resourceskus.MaximumPlatformFaultDomainCount)
if !ok {
return nil, errors.Errorf("unable to get required availability set SKU capability %s", resourceskus.MaximumPlatformFaultDomainCount)
}
count, err := strconv.ParseInt(faultDomainCountStr, 10, 32)
if err != nil {
return nil, errors.Wrapf(err, "unable to parse availability set fault domain count")
}
faultDomainCount = ptr.To[int32](int32(count))
return faultDomainCount, nil
}
6 changes: 5 additions & 1 deletion azure/services/disks/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package disks

import (
"context"
"strings"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
Expand All @@ -37,7 +38,10 @@ type azureClient struct {

// newClient creates a new disks client from an authorizer.
func newClient(auth azure.Authorizer, apiCallTimeout time.Duration) (*azureClient, error) {
opts, err := azure.ARMClientOptions(auth.CloudEnvironment())
opts, err := azure.ARMClientOptions(auth.CloudEnvironment(), auth.BaseURI())
if strings.EqualFold(auth.CloudEnvironment(), azure.StackCloudName) {
opts.APIVersion = azure.StackDiskAPIVersionProfile
}
if err != nil {
return nil, errors.Wrap(err, "failed to create disks client options")
}
Expand Down
4 changes: 2 additions & 2 deletions azure/services/identities/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ type AzureClient struct {

// NewClient creates a new MSI client from an authorizer.
func NewClient(auth azure.Authorizer) (Client, error) {
opts, err := azure.ARMClientOptions(auth.CloudEnvironment())
opts, err := azure.ARMClientOptions(auth.CloudEnvironment(), auth.BaseURI())
if err != nil {
return nil, errors.Wrap(err, "failed to create identities client options")
}
Expand All @@ -54,7 +54,7 @@ func NewClient(auth azure.Authorizer) (Client, error) {

// NewClientBySub creates a new MSI client with a given subscriptionID.
func NewClientBySub(auth azure.Authorizer, subscriptionID string) (Client, error) {
opts, err := azure.ARMClientOptions(auth.CloudEnvironment())
opts, err := azure.ARMClientOptions(auth.CloudEnvironment(), auth.BaseURI())
if err != nil {
return nil, errors.Wrap(err, "failed to create identities client options")
}
Expand Down
2 changes: 1 addition & 1 deletion azure/services/inboundnatrules/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ var _ client = (*azureClient)(nil)

// newClient creates a new inbound NAT rules client from an authorizer.
func newClient(auth azure.Authorizer, apiCallTimeout time.Duration) (*azureClient, error) {
opts, err := azure.ARMClientOptions(auth.CloudEnvironment())
opts, err := azure.ARMClientOptions(auth.CloudEnvironment(), auth.BaseURI())
if err != nil {
return nil, errors.Wrap(err, "failed to create inboundnatrules client options")
}
Expand Down
Loading