diff --git a/cmd/cluster/health.go b/cmd/cluster/health.go index 1fd329b2..0efab7d3 100644 --- a/cmd/cluster/health.go +++ b/cmd/cluster/health.go @@ -1,6 +1,7 @@ package cluster import ( + "errors" "fmt" "log" "strconv" @@ -11,6 +12,7 @@ import ( "github.com/openshift/osdctl/pkg/osdCloud" "github.com/openshift/osdctl/pkg/utils" "github.com/spf13/cobra" + "google.golang.org/api/iterator" "gopkg.in/yaml.v2" cmdutil "k8s.io/kubectl/pkg/cmd/util" ) @@ -96,59 +98,123 @@ func (o *healthOptions) run() error { healthObject.Expected.Worker = int(cluster.Nodes().Compute()) } - awsClient, err := osdCloud.GenerateAWSClientForCluster(o.awsProfile, o.clusterID) - if err != nil { - return err - } - - instances, err := awsClient.DescribeInstances(&ec2.DescribeInstancesInput{}) - if err != nil { - return err - } runningMasters := 0 runningInfra := 0 runningWorkers := 0 totalStopped := 0 totalCluster := 0 - //Here we count the number of customer's running worker, infra and master instances in the cluster in the given region. To decide if the instance belongs to the cluster we are checking the Name Tag on the instance. - for idx := range instances.Reservations { - for _, inst := range instances.Reservations[idx].Instances { - tags := inst.Tags - for _, t := range tags { - if *t.Key == "Name" { - if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "master") { - totalCluster += 1 - if *inst.State.Name == "running" { - runningMasters += 1 - } - if *inst.State.Name == "stopped" { - totalStopped += 1 - } - - } else if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "infra") { - totalCluster += 1 - if *inst.State.Name == "running" { - runningInfra += 1 - } - if *inst.State.Name == "stopped" { - totalStopped += 1 - } - } else if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "worker") { - totalCluster += 1 - if *inst.State.Name == "running" { - runningWorkers += 1 - } - if *inst.State.Name == "stopped" { - totalStopped += 1 - } - + if cluster.CloudProvider().ID() == "gcp" { + clusterResources, err := ocmClient.ClustersMgmt().V1().Clusters().Cluster(o.clusterID).Resources().Live().Get().Send() + if err != nil { + return err + } + projectClaimRaw, found := clusterResources.Body().Resources()["gcp_project_claim"] + if !found { + return fmt.Errorf("The gcp_project_claim was not found in the ocm resource") + } + projectClaim, err := osdCloud.ParseGcpProjectClaim(projectClaimRaw) + if err != nil { + log.Printf("Unmarshalling GCP projectClaim failed: %v\n", err) + return err + } + projectId := projectClaim.Spec.GcpProjectID + zones := cluster.Nodes().AvailabilityZones() + if projectId == "" || len(zones) == 0 { + return fmt.Errorf("ProjectID or Zones empty - aborting") + } + gcpClient, err := osdCloud.GenerateGCPComputeInstancesClient() + defer gcpClient.Close() + if err != nil { + return err + } + ownedLabel := "kubernetes-io-cluster-" + cluster.InfraID() + for _, zone := range zones { + instances := osdCloud.ListInstances(gcpClient, projectId, zone) + for { + instance, err := instances.Next() + if err == iterator.Done { + break + } + if err != nil { + return err + } + name := instance.GetName() + state := instance.GetStatus() + labels := instance.GetLabels() + belongsToCluster := false + for label := range labels { + if label == ownedLabel { + belongsToCluster = true + } + } + if !belongsToCluster { + log.Printf("Skipping a machine not belonging to the cluster: %s\n", name) + continue + } + totalCluster += 1 + if state != "RUNNING" { + totalStopped += 1 + } else { + if strings.HasPrefix(name, cluster.InfraID()) && strings.Contains(name, "master") { + runningMasters += 1 + } else if strings.HasPrefix(name, cluster.InfraID()) && strings.Contains(name, "infra") { + runningInfra += 1 + } else if strings.HasPrefix(name, cluster.InfraID()) && strings.Contains(name, "worker") { + runningWorkers += 1 } } } + } + } else if cluster.CloudProvider().ID() == "aws" { + awsClient, err := osdCloud.GenerateAWSClientForCluster(o.awsProfile, o.clusterID) + if err != nil { + return err + } + instances, err := awsClient.DescribeInstances(&ec2.DescribeInstancesInput{}) + if err != nil { + return err } + //Here we count the number of customer's running worker, infra and master instances in the cluster in the given region. To decide if the instance belongs to the cluster we are checking the Name Tag on the instance. + for idx := range instances.Reservations { + for _, inst := range instances.Reservations[idx].Instances { + tags := inst.Tags + for _, t := range tags { + if *t.Key == "Name" { + if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "master") { + totalCluster += 1 + if *inst.State.Name == "running" { + runningMasters += 1 + } + if *inst.State.Name == "stopped" { + totalStopped += 1 + } + + } else if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "infra") { + totalCluster += 1 + if *inst.State.Name == "running" { + runningInfra += 1 + } + if *inst.State.Name == "stopped" { + totalStopped += 1 + } + } else if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "worker") { + totalCluster += 1 + if *inst.State.Name == "running" { + runningWorkers += 1 + } + if *inst.State.Name == "stopped" { + totalStopped += 1 + } + } + } + } + } + } + } else { + return errors.New(fmt.Sprintf("Unknown cloud provider found: %s", cluster.CloudProvider().ID())) } healthObject.Actual.Stopped = totalStopped diff --git a/go.mod b/go.mod index dd510bd0..729f9329 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,8 @@ require ( github.com/spf13/cobra v1.6.1 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.13.0 + google.golang.org/api v0.84.0 + google.golang.org/genproto v0.0.0-20220616135557-88e70c0c3a90 gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.24.3 k8s.io/apimachinery v0.24.3 @@ -146,9 +148,7 @@ require ( golang.org/x/term v0.3.0 // indirect golang.org/x/text v0.5.0 // indirect golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect - google.golang.org/api v0.84.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20220616135557-88e70c0c3a90 // indirect google.golang.org/grpc v1.47.0 // indirect google.golang.org/protobuf v1.28.1 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect diff --git a/pkg/osdCloud/gcp.go b/pkg/osdCloud/gcp.go new file mode 100644 index 00000000..79b3aef4 --- /dev/null +++ b/pkg/osdCloud/gcp.go @@ -0,0 +1,40 @@ +package osdCloud + +import ( + "context" + "encoding/json" + + compute "cloud.google.com/go/compute/apiv1" + computepb "google.golang.org/genproto/googleapis/cloud/compute/v1" +) + +type GcpProjectClaimSpec struct { + GcpProjectID string `json:"gcpProjectID"` +} +type GcpProjectClaim struct { + Spec GcpProjectClaimSpec `json:"spec"` +} + +func ParseGcpProjectClaim(raw string) (*GcpProjectClaim, error) { + var projectClaim GcpProjectClaim + err := json.Unmarshal([]byte(raw), &projectClaim) + if err != nil { + return nil, err + } + return &projectClaim, nil +} + +func GenerateGCPComputeInstancesClient() (*compute.InstancesClient, error) { + ctx := context.Background() + client, err := compute.NewInstancesRESTClient(ctx) + return client, err +} + +func ListInstances(client *compute.InstancesClient, projectID, zone string) *compute.InstanceIterator { + ctx := context.Background() + request := &computepb.ListInstancesRequest{ + Project: projectID, + Zone: zone, + } + return client.List(ctx, request) +}