Create a Runtime SDK extension for Cluster API

Like how the admission controller lets you hook into different workload cluster event requests(creation, updation, deletion of objects) and validate or mutate them accordingly, the runtime extension allows you to connect to various cluster events and make necessary changes.

NOTE – Currently the feature is in the experimental stage and to enable the feature you have to load the environment variable EXP_RUNTIME_SDK=true

In general, the extension works as a webhook and can be written in any language of preference but to leverage the advantages of upstream CAPI we are going to use Golang here.

Here we are going to create a Runtime SDK extension that is going to hook into both DoAfterControlPlaneInitialized & DoAfterControlPlaneInitialized and for its operation on ConfigMaps. Let’s create a project name runtimesdk and create a main.go file where we are doing –

Initializing the necessary command line flags.
Creating a Golang profiler server.
Getting the client for interacting with the Kubernetes API server(see line 94).
Get the handler that we are going to implement next.
Initializing webhook server(see line 82).
Registering BeforeClusterDelete, AfterControlPlaneInitialized events in the webhook server(see line 108).
Run the webhook server.

package main

import (
	"flag"
	"net/http"
	"os"

	handler "github.com/aniruddha2000/runtime-sdk/handlers"
	"github.com/spf13/pflag"
	cliflag "k8s.io/component-base/cli/flag"
	"k8s.io/component-base/logs"
	logsv1 "k8s.io/component-base/logs/api/v1"
	"k8s.io/klog/v2"
	ctrl "sigs.k8s.io/controller-runtime"
	"sigs.k8s.io/controller-runtime/pkg/client"

	runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog"
	runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1"
	"sigs.k8s.io/cluster-api/exp/runtime/server"
)

var (
	// catalog contains all information about RuntimeHooks.
	catalog = runtimecatalog.New()

	// Flags.
	profilerAddress string
	webhookPort     int
	webhookCertDir  string
	logOptions      = logs.NewOptions()
)

func init() {
	// Adds to the catalog all the RuntimeHooks defined in cluster API.
	_ = runtimehooksv1.AddToCatalog(catalog)
}

// InitFlags initializes the flags.
func InitFlags(fs *pflag.FlagSet) {
	// Initialize logs flags using Kubernetes component-base machinery.
	logsv1.AddFlags(logOptions, fs)

	// Add test-extension specific flags
	fs.StringVar(&profilerAddress, "profiler-address", "",
		"Bind address to expose the pprof profiler (e.g. localhost:6060)")

	fs.IntVar(&webhookPort, "webhook-port", 9443,
		"Webhook Server port")

	fs.StringVar(&webhookCertDir, "webhook-cert-dir", "/tmp/k8s-webhook-server/serving-certs/",
		"Webhook cert dir, only used when webhook-port is specified.")
}

func main() {
	// Creates a logger to be used during the main func.
	setupLog := ctrl.Log.WithName("main")

	// Initialize and parse command line flags.
	InitFlags(pflag.CommandLine)
	pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc)
	pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
	pflag.Parse()

	// Validates logs flags using Kubernetes component-base machinery and applies them
	if err := logsv1.ValidateAndApply(logOptions, nil); err != nil {
		setupLog.Error(err, "unable to start extension")
		os.Exit(1)
	}

	// Add the klog logger in the context.
	ctrl.SetLogger(klog.Background())

	// Initialize the golang profiler server, if required.
	if profilerAddress != "" {
		klog.Infof("Profiler listening for requests at %s", profilerAddress)
		go func() {
			klog.Info(http.ListenAndServe(profilerAddress, nil))
		}()
	}

	// Create a http server for serving runtime extensions
	webhookServer, err := server.New(server.Options{
		Catalog: catalog,
		Port:    webhookPort,
		CertDir: webhookCertDir,
	})
	if err != nil {
		setupLog.Error(err, "error creating webhook server")
		os.Exit(1)
	}

	// Lifecycle Hooks
	restConfig, err := ctrl.GetConfig()
	if err != nil {
		setupLog.Error(err, "error getting config for the cluster")
		os.Exit(1)
	}

	client, err := client.New(restConfig, client.Options{})
	if err != nil {
		setupLog.Error(err, "error creating client to the cluster")
		os.Exit(1)
	}

	lifecycleExtensionHandlers := handler.NewExtensionHandlers(client)

	// Register extension handlers.
	if err := webhookServer.AddExtensionHandler(server.ExtensionHandler{
		Hook:        runtimehooksv1.BeforeClusterDelete,
		Name:        "before-cluster-delete",
		HandlerFunc: lifecycleExtensionHandlers.DoBeforeClusterDelete,
	}); err != nil {
		setupLog.Error(err, "error adding handler")
		os.Exit(1)
	}

	if err := webhookServer.AddExtensionHandler(server.ExtensionHandler{
		Hook:        runtimehooksv1.AfterControlPlaneInitialized,
		Name:        "before-cluster-create",
		HandlerFunc: lifecycleExtensionHandlers.DoAfterControlPlaneInitialized,
	}); err != nil {
		setupLog.Error(err, "error adding handler")
		os.Exit(1)
	}

	// Setup a context listening for SIGINT.
	ctx := ctrl.SetupSignalHandler()

	// Start the https server.
	setupLog.Info("Starting Runtime Extension server")
	if err := webhookServer.Start(ctx); err != nil {
		setupLog.Error(err, "error running webhook server")
		os.Exit(1)
	}
}

Now, it’s time to create the handlers for each event, let’s create a file handlers/hooks.go , here we are doing this –

DoAfterControlPlaneInitialized –
- Check whether a ConfigMap is present or not for the particular name & namespace.
- If not it’s going to create one, otherwise it won’t complain about anything and the request will pass.
DoBeforeClusterDelete –
- Check whether the ConfigMap is present or not for the particular name & namespace.
- If yes it’s going to delete it before the workload cluster gets deleted, otherwise the request will pass.

package handler

import (
	"context"
	"fmt"

	"github.com/pkg/errors"
	corev1 "k8s.io/api/core/v1"
	apierrors "k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/klog/v2"
	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
	runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1"
	ctrl "sigs.k8s.io/controller-runtime"
	"sigs.k8s.io/controller-runtime/pkg/client"
)

type ExtensionHandler struct {
	client client.Client
}

func NewExtensionHandlers(client client.Client) *ExtensionHandler {
	return &ExtensionHandler{
		client: client,
	}
}

func (e *ExtensionHandler) DoBeforeClusterDelete(ctx context.Context, request *runtimehooksv1.BeforeClusterDeleteRequest, response *runtimehooksv1.BeforeClusterDeleteResponse) {
	log := ctrl.LoggerFrom(ctx)
	log.Info("DoBeforeClusterDelete is called")
	log.Info("Namespace:", request.Cluster.GetNamespace(), "ClusterName: ", request.Cluster.GetName())

	// Your implementation
	configMapName := fmt.Sprintf("%s-test-extension-hookresponse", request.Cluster.GetName())
	ok, err := e.checkConfigMap(ctx, &request.Cluster, configMapName)
	if err != nil {
		response.Status = runtimehooksv1.ResponseStatusFailure
		response.Message = err.Error()
		return
	}
	if ok {
		if err := e.deleteConfigMap(ctx, &request.Cluster, configMapName); err != nil {
			response.Status = runtimehooksv1.ResponseStatusFailure
			response.Message = err.Error()
			return
		}
	}
}

func (e *ExtensionHandler) DoAfterControlPlaneInitialized(ctx context.Context, request *runtimehooksv1.AfterControlPlaneInitializedRequest, response *runtimehooksv1.AfterControlPlaneInitializedResponse) {
	log := ctrl.LoggerFrom(ctx)
	log.Info("DoAfterControlPlaneInitialized is called")
	log.Info("Namespace:", request.Cluster.GetNamespace(), "ClusterName: ", request.Cluster.GetName())

	// Your implementation
	configMapName := fmt.Sprintf("%s-test-extension-hookresponse", request.Cluster.GetName())
	ok, err := e.checkConfigMap(ctx, &request.Cluster, configMapName)
	if err != nil {
		response.Status = runtimehooksv1.ResponseStatusFailure
		response.Message = err.Error()
		return
	}
	if !ok {
		if err := e.createConfigMap(ctx, &request.Cluster, configMapName); err != nil {
			response.Status = runtimehooksv1.ResponseStatusFailure
			response.Message = err.Error()
			return
		}
	}
}

func (e *ExtensionHandler) checkConfigMap(ctx context.Context, cluster *clusterv1.Cluster, configMapName string) (bool, error) {
	log := ctrl.LoggerFrom(ctx)
	log.Info("Checking for ConfigMap", configMapName)

	configMap := &corev1.ConfigMap{}
	nsName := client.ObjectKey{Namespace: cluster.GetNamespace(), Name: configMapName}
	if err := e.client.Get(ctx, nsName, configMap); err != nil {
		if apierrors.IsNotFound(err) {
			log.Info("ConfigMap not found")
			return false, nil
		}
		log.Error(err, "ConfigMap not found with an error")
		return false, errors.Wrapf(err, "failed to read the ConfigMap %s", klog.KRef(cluster.Namespace, configMapName))
	}
	log.Info("ConfigMap found")
	return true, nil
}

func (e *ExtensionHandler) createConfigMap(ctx context.Context, cluster *clusterv1.Cluster, configMapName string) error {
	log := ctrl.LoggerFrom(ctx)
	log.Info("Creating ConfigMap")

	configMap := e.getConfigMap(cluster, configMapName)
	if err := e.client.Create(ctx, configMap); err != nil {
		log.Error(err, "failed to create ConfigMap")
		return errors.Wrapf(err, "failed to create the ConfigMap %s", klog.KRef(cluster.Namespace, configMapName))
	}
	log.Info("configmap created successfully")
	return nil
}

func (e *ExtensionHandler) deleteConfigMap(ctx context.Context, cluster *clusterv1.Cluster, configMapName string) error {
	log := ctrl.LoggerFrom(ctx)
	log.Info("Deleting ConfigMap")

	if err := e.client.Delete(ctx, &corev1.ConfigMap{
		ObjectMeta: metav1.ObjectMeta{
			Name:      configMapName,
			Namespace: cluster.GetNamespace(),
		},
	}); err != nil {
		log.Error(err, "failed to delete ConfigMap")
		return err
	}
	return nil
}

func (e *ExtensionHandler) getConfigMap(cluster *clusterv1.Cluster, configMapName string) *corev1.ConfigMap {
	return &corev1.ConfigMap{
		ObjectMeta: metav1.ObjectMeta{
			Name:      configMapName,
			Namespace: cluster.GetNamespace(),
		},
		Data: map[string]string{
			"AfterControlPlaneInitialized-preloadedResponse": `{"Status": "Success"}`,
		},
	}
}

Implement the Kubernetes manifest

This is the most interesting part and some bits and pieces need to be taken care of, such as –

Kubernetes ecosystem by default only supports SSL secure webhooks. For that, we are going to use cert-manager to automate the self-signed certificate automation.
The extension config must be registered through the ExtensionConfig CRD.
Don’t forget about the RBAC, if you are doing some operation over some resources, make sure you define permissions for those.

NOTE – For this example, we are doing everything in runtimesdk namespace.

Let’s start with certificate.yaml –

Creating a self-signed certificate using the Issuer
Defining the DNS Service name for the certificate
- <service_name>.<namespace>.svc

apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
  name: runtime-sdk-selfsigned-issuer
  namespace: runtimesdk
spec:
  selfSigned: {}

---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
  name: serving-cert
  namespace: runtimesdk
spec:
  dnsNames:
    - test-runtime-sdk-svc.runtimesdk.svc
    - test-runtime-sdk-svc.runtimesdk.svc.cluster.local
    - localhost
  issuerRef:
    kind: Issuer
    name: runtime-sdk-selfsigned-issuer
  secretName: test-runtime-sdk-svc-cert

service.yaml –

Defining the ClusterIP service, and the target deployment. Running the webhook in port 443 which is typically used for https URLs.

apiVersion: v1
kind: Service
metadata:
  name: test-runtime-sdk-svc
  namespace: runtimesdk
spec:
  type: ClusterIP
  selector:
    app: test-runtime-sdk
  ports:
    - port: 443
      targetPort: 9443

deployment.yaml –

Build your docker image and push it to the repository.
Get the certificates and mount them in a volume, and use it in the argument while running the container.

apiVersion: apps/v1
kind: Deployment
metadata:
  name: test-runtime-sdk
  namespace: runtimesdk
spec:
  selector:
    matchLabels:
      app: test-runtime-sdk
  template:
    metadata:
      labels:
        app: test-runtime-sdk
    spec:
      serviceAccountName: test-runtime-sdk-sa
      containers:
        - name: test-runtime-sdk
          image: <image_name>:<image_tag>
          imagePullPolicy: Always
          args:
            - --webhook-cert-dir=/var/run/webhook/serving-cert/
          resources:
            limits:
              memory: "128Mi"
              cpu: "500m"
          ports:
            - containerPort: 9443
          volumeMounts:
            - mountPath: /var/run/webhook/serving-cert
              name: serving-cert
      volumes:
        - name: serving-cert
          secret:
            secretName: test-runtime-sdk-svc-cert

Service Account, Cluster Role, Cluster Rolebindings –

Create your own service account.
Add get, list, create, and delete permissions.
Bind the role with the service account using role bindings.

apiVersion: v1
kind: ServiceAccount
metadata:
  name: test-runtime-sdk-sa
  namespace: runtimesdk

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: test-runtime-sdk-role
rules:
  - apiGroups:
      - ""
    resources:
      - configmaps
    verbs:
      - get
      - list
      - create
      - delete

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: test-runtime-sdk-role-rolebinding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: test-runtime-sdk-role-role
subjects:
  - kind: ServiceAccount
    name: test-runtime-sdk-sa
    namespace: runtimesdk

Lastly, the most important piece, the ExtensionConfig CRD –

Get the certificates through annotations.
Specify where the Runtime Extension is deployed.
Specify Runtime Extension is used by Cluster in which namespace.

apiVersion: runtime.cluster.x-k8s.io/v1alpha1
kind: ExtensionConfig
metadata:
  annotations:
    runtime.cluster.x-k8s.io/inject-ca-from-secret: runtimesdk/test-runtime-sdk-svc-cert
  name: test-runtime-sdk-extensionconfig
spec:
  clientConfig:
    service:
      name: test-runtime-sdk-svc
      namespace: runtimesdk # Note: this assumes the test extension get deployed in the runtimesdk namespace
      port: 443
  namespaceSelector:
    matchExpressions:
      - key: kubernetes.io/metadata.name
        operator: In
        values:
          - default # Note: this assumes the test extension is used by Cluster in the default namespace only

You can define the Dockerfile like this –

FROM golang:alpine3.17 as builder
WORKDIR /src
COPY . .
RUN --mount=type=cache,target=/root/.cache/go-build \
    --mount=type=cache,target=/go/pkg/mod \
    go build -o runtime-sdk

FROM alpine
WORKDIR /app
COPY --from=builder /src/runtime-sdk /app/runtime-sdk
ENTRYPOINT ["/app/runtime-sdk"]

Let’s run the App in a Kind CAPD Cluster

Export necessary ENV variables, Create a kind cluster –

$ cat > cluster.env << EOF
export CLUSTER_TOPOLOGY=true
export EXP_RUNTIME_SDK=true
export SERVICE_CIDR=["10.96.0.0/12"]
export POD_CIDR=["192.168.0.0/16"]
export SERVICE_DOMAIN="k8s.test"
EOF

$ source cluster.env

$ cat > kind-cluster-with-extramounts.yaml <<EOF
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
networking:
  ipFamily: dual
name: extension-config-test
nodes:
- role: control-plane
  extraMounts:
    - hostPath: /var/run/docker.sock
      containerPath: /var/run/docker.sock
EOF

$ kind create cluster --config kind-cluster-with-extramounts.yaml
Creating cluster "extension-config-test" ...
 ✓ Ensuring node image (kindest/node:v1.27.1) 🖼
 ✓ Preparing nodes 📦  
 ✓ Writing configuration 📜 
 ✓ Starting control-plane 🕹️ 
 ✓ Installing CNI 🔌 
 ✓ Installing StorageClass 💾 
Set kubectl context to "kind-extension-config-test"
You can now use your cluster with:

kubectl cluster-info --context kind-extension-config-test

Thanks for using kind! 😊

Create the runtimesdk namespace & initialize the management cluster –

$ kubectl create ns runtimesdk

$ clusterctl init --infrastructure docker
Fetching providers
Installing cert-manager Version="v1.11.1"
Waiting for cert-manager to be available...
Installing Provider="cluster-api" Version="v1.4.2" TargetNamespace="capi-system"
Installing Provider="bootstrap-kubeadm" Version="v1.4.2" TargetNamespace="capi-kubeadm-bootstrap-system"
Installing Provider="control-plane-kubeadm" Version="v1.4.2" TargetNamespace="capi-kubeadm-control-plane-system"
Installing Provider="infrastructure-docker" Version="v1.4.2" TargetNamespace="capd-system"

Your management cluster has been initialized successfully!

You can now create your first workload cluster by running the following:

  clusterctl generate cluster [name] --kubernetes-version [version] | kubectl apply -f -

Now apply all of the created manifest, and see there are two thing that you must see is –
- ExtensionConfig Deployment logs.
- Status of the ExtensionConfig CRD.

$ k apply -f runtime-sdk/manifests/config/
extensionconfig.runtime.cluster.x-k8s.io/test-runtime-sdk-extensionconfig created
issuer.cert-manager.io/runtime-sdk-selfsigned-issuer created
certificate.cert-manager.io/serving-cert created
deployment.apps/test-runtime-sdk created
serviceaccount/test-runtime-sdk-sa created
clusterrole.rbac.authorization.k8s.io/test-runtime-sdk-role created
clusterrolebinding.rbac.authorization.k8s.io/test-runtime-sdk-role-rolebinding created
service/test-runtime-sdk-svc created

$ k get pods -n runtimesdk
NAME                                READY   STATUS    RESTARTS   AGE
test-runtime-sdk-5bc665d7b9-725hl   1/1     Running   0          12m

$ k logs -n runtimesdk test-runtime-sdk-5bc665d7b9-725hl --follow
I0524 07:30:59.714901       1 main.go:130] "main: Starting Runtime Extension server"
I0524 07:30:59.715180       1 server.go:149] "controller-runtime/webhook: Registering webhook" path="/hooks.runtime.cluster.x-k8s.io/v1alpha1/beforeclusterdelete/before-cluster-delete"
I0524 07:30:59.715261       1 server.go:149] "controller-runtime/webhook: Registering webhook" path="/hooks.runtime.cluster.x-k8s.io/v1alpha1/aftercontrolplaneinitialized/before-cluster-create"
I0524 07:30:59.715314       1 server.go:149] "controller-runtime/webhook: Registering webhook" path="/hooks.runtime.cluster.x-k8s.io/v1alpha1/discovery"
I0524 07:30:59.715340       1 server.go:217] "controller-runtime/webhook/webhooks: Starting webhook server"
I0524 07:30:59.716380       1 certwatcher.go:131] "controller-runtime/certwatcher: Updated current TLS certificate"
I0524 07:30:59.716757       1 certwatcher.go:85] "controller-runtime/certwatcher: Starting certificate watcher"
I0524 07:30:59.716918       1 server.go:271] "controller-runtime/webhook: Serving webhook server" host="" port=9443

Now the log showing that our app is running perfectly, let’s see the status now,

$ k describe extensionconfig test-runtime-sdk-extensionconfig -n runtimesdk
Name:         test-runtime-sdk-extensionconfig
Namespace:    
Labels:       <none>
Annotations:  runtime.cluster.x-k8s.io/inject-ca-from-secret: runtimesdk/test-runtime-sdk-svc-cert
API Version:  runtime.cluster.x-k8s.io/v1alpha1
Kind:         ExtensionConfig
Metadata:
  Creation Timestamp:  2023-05-24T07:21:49Z
  Generation:          2
  Resource Version:    3939
  UID:                 62af95a7-d924-46f6-9c5a-4ba3f4407749
Spec:
  Client Config:
    Ca Bundle:  LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURIakNDQWdhZ0F3SUJBZ0lSQUx0b1VxQzlEdHBIVTl2TkJrU0xmV0l3RFFZSktvWklodmNOQVFFTEJRQXcKQURBZUZ3MHlNekExTWpRd056SXhORGxhRncweU16QTRNakl3TnpJeE5EbGFNQUF3Z2dFaU1BMEdDU3FHU0liMwpEUUVCQVFVQUE0SUJEd0F3Z2dFS0FvSUJBUURBMUl0Mm1OdVdJMmpRUlY1cHRWTDZ3cGFHdWhObG9GWHV2b1poCkwzWHJRcktiWmRaRnJUbGlZSTI4TXlxVmhSNGh2U2MzVXp5TS8rUjdYVURCT01BNkFZeEtacXg0a3VPRk1ITXkKcUhDTTNuZTZUUCsxUS9CQkRWelMvdk9tRzdnNlF1V3VyMmFtbW4zeTI4dUpWZ0hVaUZQaHZLVHE4U0J4LzY0NQo3bEluQWVpSWVrc3JqTHFJRlFka3NnSlAvbUxSTjI4RTNPL0tVTEp5RWxsakxIelZZcmVXck5rUEh6OGVmZmFECmtmSnMxTTN0NFh3c1Jyd09QQXliUmtGcTNJbENpNEoyL3EyZHZTRlRXdy9EelRuSkE1OEt6N003MlN6aXlJRnkKM1U3ajRISkVqbG9paGU2dlJtUUxEZm5wV0xEdXhvbVJpdURMWU14dHU5VkxweEdIQWdNQkFBR2pnWkl3Z1k4dwpEZ1lEVlIwUEFRSC9CQVFEQWdXZ01Bd0dBMVVkRXdFQi93UUNNQUF3YndZRFZSMFJBUUgvQkdVd1k0SWpkR1Z6CmRDMXlkVzUwYVcxbExYTmtheTF6ZG1NdWNuVnVkR2x0WlhOa2F5NXpkbU9DTVhSbGMzUXRjblZ1ZEdsdFpTMXoKWkdzdGMzWmpMbkoxYm5ScGJXVnpaR3N1YzNaakxtTnNkWE4wWlhJdWJHOWpZV3lDQ1d4dlkyRnNhRzl6ZERBTgpCZ2txaGtpRzl3MEJBUXNGQUFPQ0FRRUFFSUsvOFJqeFBiYy80T2I4MWY4Z2h2dVN3Z0Y0V0dkK3dONVZpSndICngzVm5GWGJ6d1YvMHZreEJ5SDhFR2xLcnRjcTNVMDFvZ0taQVRadW9DYWxLVjZvUHYvNklNbXR4WHMzMk5EeWoKamwvU3FHOXJlMFhRMXBYa2xIVHpIMk9ha0ozWjZ1TUMxSzgrWS9YRUJMYzZibjhYSXpad3N5VDJkZ0RJeTkrNQpkMjZqek9EejZ4Y2h2TzBSNm1ZK2psazJpMzdwSHRiZWxrOExFeE9ObmFNWlZvWWIrYmtRWXZ5MEZQdEhsZ0NnClQycVBWQ3FISmV2cWxIakk3UFQ4YmVlNFVKcHc1Rld4L0FjbU9qd3BjTkZWbkMwaFFtZmNTazNvb2Z4bTViem0KUTd1d1ZaSzBmWDFaVjJvWGNrZEtPMUluNnZpVkpWSzRESzV3MXh3MnBMWHhGUT09Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
    Service:
      Name:       test-runtime-sdk-svc
      Namespace:  runtimesdk
      Port:       443
  Namespace Selector:
    Match Expressions:
      Key:       kubernetes.io/metadata.name
      Operator:  In
      Values:
        default
Status:
  Conditions:
    Last Transition Time:  2023-05-24T07:32:44Z
    Status:                True
    Type:                  Discovered
  Handlers:
    Failure Policy:  Fail
    Name:            before-cluster-delete.test-runtime-sdk-extensionconfig
    Request Hook:
      API Version:    hooks.runtime.cluster.x-k8s.io/v1alpha1
      Hook:           BeforeClusterDelete
    Timeout Seconds:  10
    Failure Policy:   Fail
    Name:             before-cluster-create.test-runtime-sdk-extensionconfig
    Request Hook:
      API Version:    hooks.runtime.cluster.x-k8s.io/v1alpha1
      Hook:           AfterControlPlaneInitialized
    Timeout Seconds:  10
Events:               <none>

If you look closely, it has fetched the CA bundle correctly from the annotations and both of the Hook showing in the status.

Create a Workload Cluster Now –

$ clusterctl generate cluster extension-config-test --flavor development \
--kubernetes-version v1.27.1 \
--control-plane-machine-count=1 \
--worker-machine-count=1 \
> manifests/capi/capi-quickstart.yaml

$ k apply -f manifests/capi/capi-quickstart.yaml
clusterclass.cluster.x-k8s.io/quick-start created
dockerclustertemplate.infrastructure.cluster.x-k8s.io/quick-start-cluster created
kubeadmcontrolplanetemplate.controlplane.cluster.x-k8s.io/quick-start-control-plane created
dockermachinetemplate.infrastructure.cluster.x-k8s.io/quick-start-control-plane created
dockermachinetemplate.infrastructure.cluster.x-k8s.io/quick-start-default-worker-machinetemplate created
kubeadmconfigtemplate.bootstrap.cluster.x-k8s.io/quick-start-default-worker-bootstraptemplate created
cluster.cluster.x-k8s.io/extension-config-test created

Let’s see the logs and ConfigMap if it has created something or not,

I0524 07:40:49.405854       1 hooks.go:52] "DoAfterControlPlaneInitialized is called"
I0524 07:40:49.406022       1 hooks.go:53] "Namespace:" default="ClusterName: " extension-config-test="(MISSING)"
I0524 07:40:49.406093       1 hooks.go:74] "Checking for ConfigMap" extension-config-test-test-extension-hookresponse="(MISSING)"
I0524 07:40:49.421562       1 hooks.go:80] "ConfigMap not found"
I0524 07:40:49.421596       1 hooks.go:92] "Creating ConfigMap"
I0524 07:40:49.437841       1 hooks.go:99] "configmap created successfully"

$ k get configmaps
NAME                                                DATA   AGE
extension-config-test-test-extension-hookresponse   1      76s
kube-root-ca.crt                                    1      26m

Yep, now our config map is up. Let’s test the delete,

$ delete -f manifests/capi/capi-quickstart.yaml

$ k logs -n runtimesdk test-runtime-sdk-5bc665d7b9-725hl --follow
I0524 07:44:08.266319       1 hooks.go:30] "DoBeforeClusterDelete is called"
I0524 07:44:08.266347       1 hooks.go:31] "Namespace:" default="ClusterName: " extension-config-test="(MISSING)"
I0524 07:44:08.268351       1 hooks.go:74] "Checking for ConfigMap" extension-config-test-test-extension-hookresponse="(MISSING)"
I0524 07:44:08.288940       1 hooks.go:86] "ConfigMap found"
I0524 07:44:08.289163       1 hooks.go:105] "Deleting ConfigMap"

$ k get configmaps
NAME               DATA   AGE
kube-root-ca.crt   1      29m

So, now there is now ConfigMap as well, Everything is working fine then 😉

Thanks for reading 🙂

Feedbacks are welcome!

Being a part of the first ever Release Team of Cluster API as CI Signal/Bug Triage/Automation Shadow

Cluster API 1.4 just got released and I consider myself very lucky to be able to help with the release as the CI Signal Shadow and get to work with the fantastic folks in the community! If this sounds interesting to you, but you’ve no clue how to get started, then keep reading, and I’ll try to answer most questions people have when getting started with the release team.

What Is the Cluster API Release Team?

For each new release of Cluster API, there is a team of community members who are responsible for day-to-day release work, for example, Watching CI in the test-grid, Writing, and publishing release notes, highlighting release blocking issues to the maintainer, etc.

The release team is divided into a few subgroups with a Release Lead and a few shadows and also a lead and shadows in each subgroup as well. The subgroups are –

Communication Team.
CI Signal/Bug Triage/Automation Team.

My Personal Experience With the Release Team

I started with Cluster API 1.4 Release Team as a CI Signal Shadow. The CI Signal Team is responsible for monitoring the CI throughout the release and ensuring any test failures report weekly to the team and get fixed before the release. The team was very helpful in onboarding me as a complete beginner in the release team and clarifying every doubt I had regarding the release. I am still new in terms of fixing failures and that’s my improvement goal in the future.

Now I am continuing to work in the 1.5 release team in CI Signal.

Thanks to the entire release team for having me and helping me whenever I had any doubts.

Why Should you Consider Applying?

Joining as a shadow allows you to directly get mentored by the role lead and the broader release team. Not only do you end up learning about the release process, but you also get to learn a lot about how the open-source K8s project is structured.

How do I apply?

The Cluster API release team doesn’t have any official shadow selection process like upstream Kubernetes has. Anybody who is an active member of the community and has the enthusiasm to learn new things can come and help the team. One potential idea is to contribute to Cluster API and join release meetings every week and get involved in the next release cycle.

Understanding go.mod and go.sum in Golang projects

In this post, I’ll primarily try to explain how dependency management works in Golang and we use various commands and go.mod files to tackle dependencies.

What is go.mod file?

go.mod is the root dependency module for golang projects and all the modules required for the project are present in the file. This means all the projects we are going to import in our projects will be listed in the file.

go mod init github.com/foo/bar we can create a go.mod file which will include the following content –

module github.com/foo/bar

go 1.17

require (
  github.com/gin-gonic/gin v1.8.1
  github.com/google/uuid v1.3.0
)

require (
  github.com/onsi/ginkgo v1.16.5 // indirect
  github.com/onsi/gomega v1.24.1 // indirect
)

Types of dependencies –

Direct Dependency – It is the project that our project is directly using in the code.
Indirect Dependency – It is the module that our project is not using but some other module in our project is using. (e.g. – //indirect in the above go.mod file)

What is `go mod tidy`?

go mod tidy is the command to ensure that go.mod file has all the project dependencies listed. Also, if there is some dependency listed in the go.mod file that is not used by the project it will remove those.

What is `go mod vendor`?

So, with go vendor, a vendor directory will be created and all the dependencies will be stored in that directory. So next time instead of downloading from the internet go and take those dependencies from the vendor directory.

What is go.sum? Is it some kind of locking file?

go.mod file contains 100% information to build the project. But, go.sum contains cryptographic checksums to ensure provided module. Typically it contains all checksum for direct & indirect dependencies in a project so that the go.sum file is larger than the go.mod.

If someone clones your repository and they will receive an error if there is a mismatch in their downloaded copies and entries in the go.sum.

So, go.sum is not a lock file it’s an alternative dependency management system.

In addition, go.sum will be used to get local copies of the cache in the system for the further builds present in the $GOPATH/pkg/mod directory.

Tests your MongoDB code with mtest in Golang

What is Unit Testing?

Unit testing is a method we follow to test the smallest piece of code that can be logically isolated in a project often a function.

In Golang we have our standard library called testing. We use it to test a function and check various scenarios and our function is behaving the way we want.

Why do we need Unit Testing?

That’s an interesting question we use it for convenience and maintainability of our project in long term. Once the project has gone bigger & bigger it will be very hard for the developer to test that every corner of the project is running properly if some part of the code is changed. So we use testing for that suppose some developer has changed some part of the code & they can run the test and see everything is running properly.

What is Mocking?

Suppose you have written a web API or have written something related to a Database so when you will be testing you will require some kind of similar environment like a Server or Database to test your code. But, it’s very expensive and inconvenient to have some server or database to test the code. So we use something called mocking which consists of some kind of fake environment for testing and will provide similar features to the actual environment.

Testing MongoDB

Now you know what is testing and mocking and have written some awesome MongoDB and now you want to test the code. MongoDB provides something for us which is mtest. (P.S. – It states that “mtest is unstable and there is no backward compatibility guarantee. It is experimental and subject to change.”). So using it is a little tricky.

Creating Mock Environment

To create a mock deployment in mtest we do the following –

mt := mtest.New(t, mtest.NewOptions().DatabaseName("test-db").ClientType(mtest.Mock))

It will create and return a mock MongoDB deployment upon which we can run our tests.

Sample Go code to test

Suppose you have the following Go struct data –

type Data struct {
	ID           primitive.ObjectID `json:"id" bson:"_id"`
    DataID       string             `json:"data_id" bson:"data_id"`
	Name         string             `json:"name" bson:"name"`
	PublishedAt  time.Time          `json:"publishedAt" bson:"publishedAt"`
}

You have the following method for this data struct dealing with MongoDB.

func Create(ctx context.Context, data Data, col *mongo.Collection) (string, error) {
	_, err := col.InsertOne(ctx, data)
	if err != nil {
		return "", err
	}

	return "Data Created", nil
}

func Get(ctx context.Context, dataID string, col *mongo.Collection) (Data, error) {
	res := col.FindOne(ctx, bson.M{
		"data_id": dataID,
	})

	if res.Err() != nil {
		return Data{}, res.Err()
	}

	var data Data
	err := res.Decode(&data)
	if err != nil {
		return Data{}, err
	}

	return data, nil
}

Writing tests for the above code

Create

By default when you run the mtest for a mock deployment it will create a collection mt.Coll and delete it after the test has been done. So, below which we will write a typical success command –

func TestCreateData(t *testing.T) {
	t.Parallel()

	ctx := context.Background()
	mt := mtest.New(t, mtest.NewOptions().DatabaseName("test-db").ClientType(mtest.Mock))
	defer mt.Close()

	testCases := []struct {
		name         string
		data         Data
		prepareMock  func(mt *mtest.T)
		want         string
		wantErr      bool
	}{
		{
			name:         "create data successfully",
			data: Data{
				ID:           primitive.NewObjectID(),
				DataID:       uuid.New().String(),
				Name:         "John Doe",
				PublishedAt:  time.Now(),
			},
			prepareMock: func(mt *mtest.T) {
				mt.AddMockResponses(mtest.CreateSuccessResponse())
			},
			want:    "Data Created",
			wantErr: false,
		},
	}

	for _, tt := range testCases {
		mt.Run(tt.name, func(mt *mtest.T) {
			tt.prepareMock(mt)

			got, err := Create(ctx, tt.data, mt.Coll)

			if tt.wantErr {
				assert.Errorf(t, err, "Want error but got: %v", err)
			} else {
				assert.NoErrorf(t, err, "Not expecting error but got: %v", err)
			}

			assert.Equalf(t, tt.want, got, "want: %v, but got: %v", tt.want, got)
		})
	}
}

What we are doing here –

Creating the MongoDB mock deployment(Also closing it after it is done by defer).
Defining a struct slice with the necessary data needed to test and operating over the slice so that in long term we can add more scenarios without writing the same code again and again.
prepareMock – It’s where we are creating the mock response on the collection. This means when you will be calling the target method the mock response will be returned and you will check whether your function is behaving correctly with the response scenario or not.
Lastly, you will call the Create method and assert the return values with the desired values or not.

Thanks to mtest that it provides a function to create a mock response for some successful insertion of data in the DB that is mtest.CreateSuccessResponse()

Get

Here if you see the code you will see that there are three scenarios in the code –

Get data successfully.
If no filter matches then it returns an error.
If data inside the Db is corrupted then it will throw some decode error.

func TestGetData(t *testing.T) {
	t.Parallel()

	ctx := context.Background()
	mt := mtest.New(t, mtest.NewOptions().DatabaseName("test-db").ClientType(mtest.Mock))
	defer mt.Close()

	id := primitive.NewObjectID()
	dataid := uuid.New().String()
	publishedAt := time.Now().UTC().Truncate(time.Millisecond)

	testCases := []struct {
		name         string
		dataID       string
		prepareMock  func(mt *mtest.T)
		want         Data
		wantErr      bool
	}{
		{
			name:         "get Data Successfully",
			prepareMock: func(mt *mtest.T) {
				first := mtest.CreateCursorResponse(1, "foo.bar", mtest.FirstBatch, bson.D{
					{Key: "_id", Value: id},
					{Key: "data_id", Value: dataid},
					{Key: "name", Value: "John Doe"},
					{Key: "publishedAt", Value: publishedAt},
				})
				killCursor := mtest.CreateCursorResponse(0, "foo.bar", mtest.NextBatch)

				mt.AddMockResponses(first, killCursor)
			},
			dataID: dataid,
			want: Data{
				ID:           id,
				DataID:       dataid,
				Name:         "John Doe",
				PublishedAt:  publishedAt,
			},
			wantErr: false,
		},
		{
			name:         "get decode error",
			prepareMock: func(mt *mtest.T) {
                // The name expect a `string` but in the DB there is `integer` stored
				// So, while decoding it will throw an error.
				first := mtest.CreateCursorResponse(1, "foo.bar", mtest.FirstBatch, bson.D{
					{Key: "_id", Value: id},
					{Key: "data_id", Value: dataid},
					{Key: "name", Value: 1234},
					{Key: "publishedAt", Value: publishedAt},
				})
				killCursor := mtest.CreateCursorResponse(0, "foo.bar", mtest.NextBatch)

				mt.AddMockResponses(first, killCursor)
			},
			dataID:     dataid,
			want:       Data{},
			wantErr:    true,
		},
		{
			name:         "wrong data ID",
			prepareMock: func(mt *mtest.T) {
				mt.AddMockResponses(bson.D{
					{Key: "ok", Value: 1},
					{Key: "acknowledged", Value: true},
					{Key: "n", Value: 0},
				})
			},
			dataID: uuid.NewString(),
			want:       Data{},
			wantErr:    true,
		},
	}

	for _, tt := range testCases {
		mt.Run(tt.name, func(mt *mtest.T) {
			tt.prepareMock(mt)

			got, err := Get(ctx, tt.dataID, mt.Coll)

			if tt.wantErr {
				assert.Errorf(t, err, "Want error but got: %v", err)
			} else {
				assert.NoErrorf(t, err, "Not expecting error but got: %v", err)
			}

			assert.Equalf(t, tt.want, got, "want: %v, but got: %v", tt.want, got)
		})
	}
}

What we are doing here –

As usual, initiating mock MongoDB deployment.
Defining a struct slice with necessary fields for testing. Also, define some Data struct fields so that it stays unique throughout the test case.
Scenario 1 (Successful Get of Data) – Here we define a mock response for the collection –

first := mtest.CreateCursorResponse(1, "foo.bar", mtest.FirstBatch, bson.D{
					{Key: "_id", Value: id},
					{Key: "data_id", Value: dataid},
					{Key: "name", Value: "John Doe"},
					{Key: "publishedAt", Value: publishedAt},
				})
				killCursor := mtest.CreateCursorResponse(0, "foo.bar", mtest.NextBatch)

				mt.AddMockResponses(first, killCursor)

Once we call the Get function the collections will return these mock responses and will not throw any error and return the data.

Scenario 2 (Decode Error) – Here we will return a mock response that will consist of an integer field in a place where it should be a string.

first := mtest.CreateCursorResponse(1, "foo.bar", mtest.FirstBatch, bson.D{
					{Key: "_id", Value: id},
					{Key: "data_id", Value: dataid},
					{Key: "name", Value: 1234},
					{Key: "publishedAt", Value: publishedAt},
				})
				killCursor := mtest.CreateCursorResponse(0, "foo.bar", mtest.NextBatch)

				mt.AddMockResponses(first, killCursor)

Here the Get function will get the response and will try to decode it wouldn’t be able to do it and will return a decode error.

Scenario 3 (No Filter Match) – Here we will create a mock response that will consist of zero response.

mt.AddMockResponses(bson.D{
					{Key: "ok", Value: 1},
					{Key: "acknowledged", Value: true},
					{Key: "n", Value: 0},
				})

Here the Get method will get n = 0 means no filter match and throw an error and that’s the correct behavior.

Thanks 🙂

How to Use Multiple Git Configs on One Computer

If you are like me and do open source contributions from an office laptop and your company uses some other git service then this blog is for you.

Using separate directory for repos

Let’s say we will create a directory based on our type of work.

Work
Personal

Create a global git config file .gitconfig

You should have a global gitconfig from where you will map your specific type of gitconfigs.

And that’s it .gitconfig

Create two specific gitconfig for two purposes

.gitconfig-work
.gitconfig-personal

Map those two gitconfigs with global gitconfig with directory

git config --global --add includeif.gitdir:/path/to/work/directory .gitconfig-work

git config --global --add includeif.gitdir:/path/to/personal/directory .gitconfig-personal

Specify information in the gitconfigs

[user]
 name = work_user
 email = work_email

[user]
 name = personal_user
 email = personal_email

Go to the directory and see your config list

$ cd ~/work
$ mkdir work-test-repo
$ cd work-test-repo
$ git init
		*Initialized empty Git repository in /Users/aniruddha/work/work-test-repo/.git/*
$ git config -l   
		*credential.helper=osxkeychain
		includeif.gitdir:~/personal/.path=~/.gitconfig-personal
		includeif.gitdir:~/work/.path=~/.gitconfig-work
		**user.name=working_me
		user.email = work@work.com**
		core.repositoryformatversion=0
		core.filemode=true
		core.bare=false
		core.logallrefupdates=true
		core.ignorecase=true
		core.precomposeunicode=true*

$ cd ~/personal
$ mkdir personal-test-repo
$ git init
	*Initialized empty Git repository in /Users/aniruddha/personal/.git/*
$ git config -l
	*credential.helper=osxkeychain
	includeif.gitdir:~/personal/.path=~/.gitconfig-personal
	**user.name=me_personal
	user.email=personal@personal.com**
	includeif.gitdir:~/work/.path=~/.gitconfig-work
	core.repositoryformatversion=0
	core.filemode=true
	core.bare=false
	core.logallrefupdates=true
	core.ignorecase=true
	core.precomposeunicode=true*

Now you can see you have two types of gitconfigs according to your directory.

My LFX mentorship experience contributing to Custer API GCP

In my previous blog post, I shared how I got selected for the LFX mentorship. In this post, I am going to write about my experience contributing to Cluster API GCP.

Mentorship Project Description

The mentorship was about adding GPU support for CAPG. For Google Cloud Platform it is NVIDIA GPU that it supports as of now. So, We first started with planning our road map about what are the steps that are required for adding the GPU support. The first thing we decided to do is create a GPU driver-enabled OS image that can take advantage of the GPUs in the VM. For that, we created this PR. Here we mostly added packer config files so that it will create the OS image with NVIDIA GPU drivers.

The next thing that we did was to make changes in the CAPG API so that we can declare the fields that are required to create the VMs with GPU in the GCP. After that, we added the validations and webhooks for the new API changes so that incoming requests will be validated properly. Finally, we added the unit tests and end-to-end tests so that we have fully tested software in the main branch. Here is the PR we created in the CAPG repo that has all the changes mentioned above.

And after all our hard work we successfully created VMs with GPU with Cluster API in the GCP.

My experience overall

I never thought of doing LFX and contributing to such big projects a few months back. The only thing that kept me motivated and kept me contributing was the awesome community and the projects. In the beginning, to get familiar with the project, my mentors gave me the task to spin a normal Kubernetes managed cluster in the GCP using Cluster API and reading the documentation. Throughout the mentorship, all my mentors Dims, Richard, and Carlos helped me overcome all kinds of challenges to complete the task, and also they gave me the motivation and enthusiasm to push my boundaries and learn new things every day. This mentorship not only helped me to become a better developer in the Cloud Native technologies but also helped me a better thinker in terms of solving real-world engineering problems. In one word my overall experience with LFX mentorship is fabulous and wonderful. And last but not least all of the above would have been incomplete if I didn’t have my co-mentee Subhasmita.

Future Scope

After this project, I started taking other open source issues in the CAPG and also started contributing to CAPI as well. And I will keep contributing to the CNCF project in the future and hopefully, I will work on more such big and significant features in the future.

Becoming Kubernetes & Kubernetes SIG member

Another great thing that happened to me was that I recently became Kubernetes, Kubernetes-SIG member. Thanks to Carlos, Nabarun, Richard, Dims for giving me +1

Also, if you have any queries regarding Cluster API GCP or Cluster API, feel free to join the Kubernetes slack using the link: https://slack.k8s.io/ and then join the #cluster-api-gcp #cluster-api channel. And, also feel free to ping me @aniruddha on slack if you have any questions.

Check for Kubernetes deployment with client-go library

For the past couple of days, I have been tinkering with the client-go library. It provides the necessary interfaces and methods by which you can manipulate the Kubernetes cluster resources from your go code. After exploring for a while I started working on a side project that does some checking over deployment and if the deployment doesn’t have a certain environment variable it will delete the deployment other wise it will keep it as it is.

Setup

In this blog, I am not going to give idea about how to set up a go project.

First, create a directory named app and create another directory inside it called service. Now create a file named init.go inside the service directory.

package service

import (
	"log"
	"os"
	"path/filepath"

	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"
	"k8s.io/client-go/tools/clientcmd"
)

// Initializes the kube config clientset
func Init() *kubernetes.Clientset {
	config, err := rest.InClusterConfig()
	if err != nil {
		kubeconfig := filepath.Join("home", "aniruddha", ".kube", "config")
		if envvar := os.Getenv("KUBECONFIG"); len(envvar) > 0 {
			kubeconfig = envvar
		}

		config, err = clientcmd.BuildConfigFromFlags("", kubeconfig)
		if err != nil {
			log.Fatalf("kubeconfig can't be loaded: %v\n", err)
		}
	}

	clientset, err := kubernetes.NewForConfig(config)
	if err != nil {
		log.Fatalf("error getting config client: %v\n", err)
	}

	return clientset
}

In the above code example, we call InClusterConfig first and that actually gives back the config object that contains a common attribute that can be passed to a Kubernetes client on initialization. If we couldn’t find the config we look for the Kube config in the default location in most of the Linux.

After we got the config now it’s time for initializing a client. We do it by NewForConfig method. It returns a clientset that contains the client’s resources for each group. Like the pods can be accessed by the corev1 group in the clientset struct.

Check for deployments

Create another directory under the app dir named client.

package client

import (
	"fmt"
	"log"
	"time"

	"k8s.io/apimachinery/pkg/util/wait"
	"k8s.io/client-go/informers"
	"k8s.io/client-go/tools/cache"
)

const (
	ENVNAME = "TEST_ENV_NAME"
)

// Check for Deployment and start a go routine if new deployment added
func (c *Client) CheckDeploymentEnv(ns string) {
	informerFactory := informers.NewSharedInformerFactory(c.C, 30*time.Second)

	deploymentInformer := informerFactory.Apps().V1().Deployments()
	deploymentInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc: func(obj interface{}) {
			log.Println("Deployment added. Let's start checking!")

			ch := make(chan error, 1)
			done := make(chan bool)

			go c.check(ns, ch, done)

		loop:
			for {
				select {
				case err := <-ch:
					log.Fatalf("error checking envvar: %v", err)
				case <-done:
					break loop
				}
			}
		},
	})

	informerFactory.Start(wait.NeverStop)
	informerFactory.WaitForCacheSync(wait.NeverStop)
}

Now in the CheckDeploymentEnv method, we first going to create the NewSharedInformerFactory which is going to give us back an interface that can be helpful to retrieve various resources from the local cache of the cluster. Then we can handle various events like add, update, delete, etc in the cluster and take action accordingly.

Then we add another function in the same file as above.

func (c *Client) check(namespace string, ch chan error, done chan bool) {
	deployments, err := ListDeploymentWithNamespace(namespace, c.C)
	if err != nil {
		ch <- fmt.Errorf("list deployment: %s", err.Error())
	}

	for _, deployment := range deployments.Items {
		var envSet bool
		for _, cntr := range deployment.Spec.Template.Spec.Containers {
			for _, env := range cntr.Env {
				if env.Name == ENVNAME {
					log.Printf("Deployment name: %s has envvar. All set to go!", deployment.Name)
					envSet = true
				}
			}
		}
		if !envSet {
			log.Printf("No envvar name %s - Deleting deployment with name %s\n", ENVNAME, deployment.Name)
			err = DeleteDeploymentWithNamespce(namespace, deployment.Name, c.C)
			if err != nil {
				ch <- err
			}
		}
	}
	done <- true
}

Here we list the deployments(covered next) and for every deployment, we check for env variables and delete them if we found that the env variable is missing. And pass true to the done channel if everything is successful otherwise pass the error to the other channel.

Deployment Handler

Create another file named deployment.go in the client directory.

package client

import (
	"fmt"
	"log"

	v1 "k8s.io/api/apps/v1"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
)

// List deployment resource with the given namespace
func ListDeploymentWithNamespace(ns string, clientset *kubernetes.Clientset) (*v1.DeploymentList, error) {
	deployment, err := clientset.AppsV1().Deployments(ns).List(ctx, metav1.ListOptions{})
	if err != nil {
		return nil, err
	}
	return deployment, nil
}

// Delete deployment resource with the given namespace
func DeleteDeploymentWithNamespce(ns, name string, clientset *kubernetes.Clientset) error {
	err := clientset.AppsV1().Deployments(ns).Delete(ctx, name, metav1.DeleteOptions{})
	if err != nil {
		if errors.IsNotFound(err) {
			log.Printf("Deployment don't exists with name %s\n", name)
			return nil
		} else {
			return fmt.Errorf("delete Deployment: %v", err)
		}
	}
	log.Printf("Deployment deleted with name: %v\n", name)

	return nil
}

Here we have two methods one for listing the deployments and another for deleting them. Here we directly get the resources from clients means we are querying them on the Kubernetes API server unlike previously from the local in-memory cache.

Now create another file name client.go in the client directory. and use the code below.

package client

import (
	"context"

	"k8s.io/client-go/kubernetes"
)

var (
	ctx = context.TODO()
)

type Client struct {
	C *kubernetes.Clientset
}

// Return a new Client
func NewClient() *Client {
	return &Client{}
}

main.go –

package main

import (
	"flag"
	"log"

	"github.com/aniruddha2000/yosemite/app/client"
	"github.com/aniruddha2000/yosemite/app/service"
)

func main() {
	var nameSpace string

	flag.StringVar(&nameSpace, "ns", "test-ns",
		"namespace name on which the checking is going to take place")

	log.Printf("Checking Pods for namespace %s\n", nameSpace)
	c := client.NewClient()
	c.C = service.Init()

	c.CheckDeploymentEnv(nameSpace)
}

Here we are just taking the namespace from the flag and calling all the necessary functions mentioned in the entire article.

Run the app in the Kubernetes cluster

In order to run the app in the cluster, we have to set up CusterRole & ClusterRoleBinding for the default service account for the pod.

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: pod-namespace-clusterrole
rules:
  - apiGroups: ["apps"]
    resources: ["deployments"]
    verbs: ["list", "delete"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: pod-namespace-clusterrolebinding
subjects:
  - kind: ServiceAccount
    name: default
    namespace: default
roleRef:
  kind: ClusterRole
  name: pod-namespace-clusterrole
  apiGroup: rbac.authorization.k8s.io

Then you have to build the project and make a docker image out of it using docker build, docker tag & docker push command. Then create a deployment YAML template mentioned below and apply that.

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: client
  name: client
spec:
  replicas: 1
  selector:
    matchLabels:
      app: client
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: client
    spec:
      containers:
      - image: <YOUR DOCKER IMAGE>
        name: client-app
        resources: {}
status: {}

Here is my GitHub URL for the project – https://github.com/aniruddha2000/yosemite/

You can find how to run the project in the README of the mentioned GitHub URL above.

What is RBAC in Kubernetes?

RBAC stands for Role Based Access Control. It allows us to define user privilege in the Kubernetes cluster that will restrict users from doing the unwanted operation. We describe access rights such as who is allowed to create, update, and delete resources.

Why do we need it?

To make the cluster more secure.
To scale our cluster to various development teams and avoid conflict between them.

Objects

In RBAC API there are main 4 types of objects –

Role – It’s used for namespace object constraints.
RoleBinding – Mapping the Role to the user.
ClusterRole – It’s used for the cluster-wide resource constraints.
CLusterRoleBinding – It’s used for mapping the ClusterRole to the user.

Example

Now we are going to create the objects mentioned above and see how these all work.

ClusterRole & ClusterRoleBinding

First, we are going to create a service account

kubectl create serviceaccount bob

Now write the below two YAML files for the ClusterRole & ClusterRoleBinding-

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: bob
rules:
  - apiGroups:
      - ''
    resources:
      - pods
      - pods/status
      - namespace
      - deployments
    verbs:
      - get
      - list
      - watch
      - create
      - update

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: bob-binding
subjects:
  - kind: ServiceAccount
    name: bob
    namespace: default
roleRef:
  kind: ClusterRole
  name: bob
  apiGroup: rbac.authorization.k8s.io

Here we first create a service account and we define a role that will be able to get, list, watch, create, and update the pods, deployments, and namespace.

Later we create a cluster role binding that will map the cluster role to the service account.

Role & RoleBinding

Let’s create a namespace first.

apiVersion: v1
kind: Namespace
metadata:
  name: application
  labels:
    name: alice

Then define the below Role and RoleBinding

apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  namespace: application
  name: alice
rules:
- apiGroups: [""]
  resources: ["pods"]
  verbs: ["get", "watch", "list"]

apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: alice-binding
  namespace: application
subjects:
- kind: User
  name: alice
  apiGroup: rbac.authorization.k8s.io
roleRef:
  kind: Role
  name: alice
  apiGroup: rbac.authorization.k8s.io

Here we create a namespace and then define a role that will allow get, watch, and list operations on the pods to the Alice namespace.

Later we map the role binding to the namespace with role binding.

How I got selected for the LFX Mentorship Program

LFX Mentorship (previously known as Community Bridge) is a platform developed by the Linux Foundation, which promotes and accelerates the adoption, innovation, and sustainability of open-source software.

LFX Mentorship is actively used by the Cloud Native Computing Foundation(CNCF) as a mentorship platform across the CNCF projects

Program Schedule

2022 — Fall Term — September 1st – Nov 30th

2022 — Summer Term — June 1st – August 31st (My Term)

2022 — Spring Term — March 1st – May 31st

How to Apply

You have to write the Cover Letter and mention all the points about why you are interested in the projects and any previous work you have done or not and what you expect from the project etc.

Tip: Start contributing early and talk to the maintainers about your interests in the program and start to discuss the issue/feature you are going to work on.

My Project

My Project is Cluster API Provider for GCP(CAPG). It is a CNCF Project that helps manage the Kubernetes cluster in the Google Cloud Platform. Currently, another provider Cluster API Provider for AWS(CAPA), Cluster API Provider for Azure(CAPZ) has the support for taking advantage of GPU in their cluster but CAPG doesn’t have so Me and Subhasmita my co-mentee will work on the project to add support for GPU in the CAPG.

My Mentors

Carlos Panato (@cpanato)
Davanum Srinivas (@dims)
Richard Case (@richardcase)

My Co-Mentee

Subhasmita Swain

Well, my journey would be a little monotonous if I didn’t have a co-mentee. It makes my work a little interesting because when we are both stuck on anything we hope on a call and discuss things. Also the weekly work we divide each other and teach each other what we have learned.

How It All Started

I didn’t have any plan to do LFX from the beginning. I started my journey with CAPG for GSoC”22. I applied for the same project and the same feature in the GSoC but that didn’t happen because the project didn’t get selected in the GSoC eventually all the applications to the project got rejected as well. So I talked to the maintainer Richard and told them that can I work in the GPU work as I was very interested in it. He told me that there is still hope in the LFX Mentorship and he opened an application there and I applied there. And then I got selected for the LFX Mentorship 🎉

How It Is Going

I was a little bit worried about how I will work on a big project like this where there are thousands of lines of code and me just a written a project with a max of 500 lines. But I am amazed how the maintainers made my journey very easy and got me onboarded with the introduction to the project for a couple of weeks and gave me small tasks of trying things out and asking a question if I am stuck at any point.

Next Steps:

I will start the GPU work the next week with Subhasmita and keep contributing to the project in the future.

Create a managed cluster using Cluster API Provider for Google Cloud Platform (CAPG)

In the previous blog, I explained how to create and manage Kubernetes with cluster API locally with the help of docker infrastructure.

In this blog, I will explain how to create and manage the k8s with Cluster API in the google cloud.

Note – Throughout the blog, I will use Kubernetes version 1.22.9 and it is recommended to use the version of our OS image created by the image builder. You can check from kubernetes.json and use that.

Step 1 –

Create the kind cluster –

kind create cluster --image kindest/node:v1.22.9 --wait 5m

Step 2 –

Follow image builder for GCP steps and build an image.

Step 3 –

Export the following env variables – (reference)

export GCP_PROJECT_ID=<YOUR PROJECT ID>
export GOOGLE_APPLICATION_CREDENTIALS=<PATH TO GCP CREDENTIALS>
export GCP_B64ENCODED_CREDENTIALS=$( cat /path/to/gcp-credentials.json | base64 | tr -d '\n' )

export CLUSTER_TOPOLOGY=true
export GCP_REGION="us-east4"
export GCP_PROJECT="<YOU GCP PROJECT NAME>"
export KUBERNETES_VERSION=1.22.9
export IMAGE_ID=projects/$GCP_PROJECT/global/images/<IMAGE ID>
export GCP_CONTROL_PLANE_MACHINE_TYPE=n1-standard-2
export GCP_NODE_MACHINE_TYPE=n1-standard-2
export GCP_NETWORK_NAME=default
export CLUSTER_NAME=test

Step 4 –

setup the network in this example we are using the default network so we will create some router/nats for our workload cluster to have internet access.

gcloud compute routers create "${CLUSTER_NAME}-myrouter" --project="${GCP_PROJECT}" --region="${GCP_REGION}" --network="default"

gcloud compute routers nats create "${CLUSTER_NAME}-mynat" --project="${GCP_PROJECT}" --router-region="${GCP_REGION}" --router="${CLUSTER_NAME}-myrouter" --nat-all-subnet-ip-ranges --auto-allocate-nat-external-ips

Step 5 –

Initialize the infrastructure

clusterctl init --infrastructure gcp

Generate the workload cluster config and apply it

clusterctl generate cluster $CLUSTER_NAME --kubernetes-version v1.22.9 > workload-test.yaml

kubectl apply -f workload-test.yaml

View the cluster and its resources

$ clusterctl describe cluster $CLUSTER_NAME
NAME                                                               READY  SEVERITY  REASON                 SINCE  MESSAGE
/test                                                              False  Info      WaitingForKubeadmInit  5s
├─ClusterInfrastructure - GCPCluster/test
└─ControlPlane - KubeadmControlPlane/test-control-plane            False  Info      WaitingForKubeadmInit  5s
  └─Machine/test-control-plane-x57zs                               True                                    31s
    └─MachineInfrastructure - GCPMachine/test-control-plane-7xzw2

Check the status of the control plane

$ kubectl get kubeadmcontrolplane
NAME                 CLUSTER   INITIALIZED   API SERVER AVAILABLE   REPLICAS   READY   UPDATED   UNAVAILABLE   AGE    VERSION
test-control-plane   test                                           1                  1         1             2m9s   v1.22.9

Note – The controller plane won’t be ready until the next step when I install the CNI (Container Network Interface).

Step 6 –

Get the kubeconfig for the workload cluster

$ clusterctl get kubeconfig $CLUSTER_NAME > workload-test.kubeconfig

Apply the cni

kubectl --kubeconfig=./workload-test.kubeconfig \
  apply -f https://docs.projectcalico.org/v3.20/manifests/calico.yaml

Wait a bit and you should see this when getting the kubeadmcontrolplane

$ kubectl get kubeadmcontrolplane
NAME                 CLUSTER   INITIALIZED   API SERVER AVAILABLE   REPLICAS   READY   UPDATED   UNAVAILABLE   AGE     VERSION
test-control-plane   test      true          true                   1          1       1         0             6m33s   v1.22.9


$ kubectl get nodes --kubeconfig=./workload-test.kubeconfig
NAME                       STATUS   ROLES                  AGE   VERSION
test-control-plane-7xzw2   Ready    control-plane,master   62s   v1.22.9

Step 7 –

Edit the MachineDeployment in the workload-test.yaml it has 0 replicas add the replicas you want to have your nodes, in this case, we used 2. Apply the workload-test.yaml

$ kubectl apply -f workload-test.yaml

After a few minutes, you should see something like this –

$ clusterctl describe cluster $CLUSTER_NAME
NAME                                                               READY  SEVERITY  REASON  SINCE  MESSAGE
/test                                                              True                     15m
├─ClusterInfrastructure - GCPCluster/test
├─ControlPlane - KubeadmControlPlane/test-control-plane            True                     15m
│ └─Machine/test-control-plane-x57zs                               True                     19m
│   └─MachineInfrastructure - GCPMachine/test-control-plane-7xzw2
└─Workers
  └─MachineDeployment/test-md-0                                    True                     10m
    └─2 Machines...                                                True                     13m    See test-md-0-68bd55744b-qpk67, test-md-0-68bd55744b-tsgf6

$ kubectl get nodes --kubeconfig=./workload-test.kubeconfig
NAME                       STATUS   ROLES                  AGE   VERSION
test-control-plane-7xzw2   Ready    control-plane,master   21m   v1.22.9
test-md-0-b7766            Ready    <none>                 17m   v1.22.9
test-md-0-wsgpj            Ready    <none>                 17m   v1.22.9

Yaaa! Now we have a Kubernetes cluster in the GCP with 1 control pannel with 2 worker nodes.

Step 8 –

Delete what you have created –

$ kubectl delete cluster $CLUSTER_NAME

$ gcloud compute routers nats delete "${CLUSTER_NAME}-mynat" --project="${GCP_PROJECT}" \
    --router-region="${GCP_REGION}" --router="${CLUSTER_NAME}-myrouter"

$ gcloud compute routers delete "${CLUSTER_NAME}-myrouter" --project="${GCP_PROJECT}" \
    --region="${GCP_REGION}"

$ kind delete cluster