Skip to content

Commit

Permalink
Initial support for GCP cluster health.
Browse files Browse the repository at this point in the history
  • Loading branch information
bergmannf committed Feb 15, 2023
1 parent 9cb3934 commit 658b97f
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 43 deletions.
148 changes: 107 additions & 41 deletions cmd/cluster/health.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cluster

import (
"errors"
"fmt"
"log"
"strconv"
Expand All @@ -11,6 +12,7 @@ import (
"github.com/openshift/osdctl/pkg/osdCloud"
"github.com/openshift/osdctl/pkg/utils"
"github.com/spf13/cobra"
"google.golang.org/api/iterator"
"gopkg.in/yaml.v2"
cmdutil "k8s.io/kubectl/pkg/cmd/util"
)
Expand Down Expand Up @@ -96,59 +98,123 @@ func (o *healthOptions) run() error {
healthObject.Expected.Worker = int(cluster.Nodes().Compute())
}

awsClient, err := osdCloud.GenerateAWSClientForCluster(o.awsProfile, o.clusterID)
if err != nil {
return err
}

instances, err := awsClient.DescribeInstances(&ec2.DescribeInstancesInput{})
if err != nil {
return err
}
runningMasters := 0
runningInfra := 0
runningWorkers := 0
totalStopped := 0
totalCluster := 0

//Here we count the number of customer's running worker, infra and master instances in the cluster in the given region. To decide if the instance belongs to the cluster we are checking the Name Tag on the instance.
for idx := range instances.Reservations {
for _, inst := range instances.Reservations[idx].Instances {
tags := inst.Tags
for _, t := range tags {
if *t.Key == "Name" {
if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "master") {
totalCluster += 1
if *inst.State.Name == "running" {
runningMasters += 1
}
if *inst.State.Name == "stopped" {
totalStopped += 1
}

} else if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "infra") {
totalCluster += 1
if *inst.State.Name == "running" {
runningInfra += 1
}
if *inst.State.Name == "stopped" {
totalStopped += 1
}
} else if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "worker") {
totalCluster += 1
if *inst.State.Name == "running" {
runningWorkers += 1
}
if *inst.State.Name == "stopped" {
totalStopped += 1
}

if cluster.CloudProvider().ID() == "gcp" {
clusterResources, err := ocmClient.ClustersMgmt().V1().Clusters().Cluster(o.clusterID).Resources().Live().Get().Send()
if err != nil {
return err
}
projectClaimRaw, found := clusterResources.Body().Resources()["gcp_project_claim"]
if !found {
return fmt.Errorf("The gcp_project_claim was not found in the ocm resource")
}
projectClaim, err := osdCloud.ParseGcpProjectClaim(projectClaimRaw)
if err != nil {
log.Printf("Unmarshalling GCP projectClaim failed: %v\n", err)
return err
}
projectId := projectClaim.Spec.GcpProjectID
zones := cluster.Nodes().AvailabilityZones()
if projectId == "" || len(zones) == 0 {
return fmt.Errorf("ProjectID or Zones empty - aborting")
}
gcpClient, err := osdCloud.GenerateGCPComputeInstancesClient()
defer gcpClient.Close()
if err != nil {
return err
}
ownedLabel := "kubernetes-io-cluster-" + cluster.InfraID()
for _, zone := range zones {
instances := osdCloud.ListInstances(gcpClient, projectId, zone)
for {
instance, err := instances.Next()
if err == iterator.Done {
break
}
if err != nil {
return err
}
name := instance.GetName()
state := instance.GetStatus()
labels := instance.GetLabels()
belongsToCluster := false
for label := range labels {
if label == ownedLabel {
belongsToCluster = true
}
}
if !belongsToCluster {
log.Printf("Skipping a machine not belonging to the cluster: %s\n", name)
continue
}
totalCluster += 1
if state != "RUNNING" {
totalStopped += 1
} else {
if strings.HasPrefix(name, cluster.InfraID()) && strings.Contains(name, "master") {
runningMasters += 1
} else if strings.HasPrefix(name, cluster.InfraID()) && strings.Contains(name, "infra") {
runningInfra += 1
} else if strings.HasPrefix(name, cluster.InfraID()) && strings.Contains(name, "worker") {
runningWorkers += 1
}
}
}
}
} else if cluster.CloudProvider().ID() == "aws" {
awsClient, err := osdCloud.GenerateAWSClientForCluster(o.awsProfile, o.clusterID)
if err != nil {
return err
}

instances, err := awsClient.DescribeInstances(&ec2.DescribeInstancesInput{})
if err != nil {
return err
}

//Here we count the number of customer's running worker, infra and master instances in the cluster in the given region. To decide if the instance belongs to the cluster we are checking the Name Tag on the instance.
for idx := range instances.Reservations {
for _, inst := range instances.Reservations[idx].Instances {
tags := inst.Tags
for _, t := range tags {
if *t.Key == "Name" {
if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "master") {
totalCluster += 1
if *inst.State.Name == "running" {
runningMasters += 1
}
if *inst.State.Name == "stopped" {
totalStopped += 1
}

} else if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "infra") {
totalCluster += 1
if *inst.State.Name == "running" {
runningInfra += 1
}
if *inst.State.Name == "stopped" {
totalStopped += 1
}
} else if strings.HasPrefix(*t.Value, cluster.InfraID()) && strings.Contains(*t.Value, "worker") {
totalCluster += 1
if *inst.State.Name == "running" {
runningWorkers += 1
}
if *inst.State.Name == "stopped" {
totalStopped += 1
}
}
}
}
}
}
} else {
return errors.New(fmt.Sprintf("Unknown cloud provider found: %s", cluster.CloudProvider().ID()))
}

healthObject.Actual.Stopped = totalStopped
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ require (
github.com/spf13/cobra v1.6.1
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.13.0
google.golang.org/api v0.84.0
google.golang.org/genproto v0.0.0-20220616135557-88e70c0c3a90
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.24.3
k8s.io/apimachinery v0.24.3
Expand Down Expand Up @@ -146,9 +148,7 @@ require (
golang.org/x/term v0.3.0 // indirect
golang.org/x/text v0.5.0 // indirect
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
google.golang.org/api v0.84.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20220616135557-88e70c0c3a90 // indirect
google.golang.org/grpc v1.47.0 // indirect
google.golang.org/protobuf v1.28.1 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
Expand Down
40 changes: 40 additions & 0 deletions pkg/osdCloud/gcp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package osdCloud

import (
"context"
"encoding/json"

compute "cloud.google.com/go/compute/apiv1"
computepb "google.golang.org/genproto/googleapis/cloud/compute/v1"
)

type GcpProjectClaimSpec struct {
GcpProjectID string `json:"gcpProjectID"`
}
type GcpProjectClaim struct {
Spec GcpProjectClaimSpec `json:"spec"`
}

func ParseGcpProjectClaim(raw string) (*GcpProjectClaim, error) {
var projectClaim GcpProjectClaim
err := json.Unmarshal([]byte(raw), &projectClaim)
if err != nil {
return nil, err
}
return &projectClaim, nil
}

func GenerateGCPComputeInstancesClient() (*compute.InstancesClient, error) {
ctx := context.Background()
client, err := compute.NewInstancesRESTClient(ctx)
return client, err
}

func ListInstances(client *compute.InstancesClient, projectID, zone string) *compute.InstanceIterator {
ctx := context.Background()
request := &computepb.ListInstancesRequest{
Project: projectID,
Zone: zone,
}
return client.List(ctx, request)
}

0 comments on commit 658b97f

Please sign in to comment.