che-operator/controllers/checlusterbackup/checlusterbackup_controller.go

280 lines
9.5 KiB
Go

//
// Copyright (c) 2021 Red Hat, Inc.
// This program and the accompanying materials are made
// available under the terms of the Eclipse Public License 2.0
// which is available at https://www.eclipse.org/legal/epl-2.0/
//
// SPDX-License-Identifier: EPL-2.0
//
// Contributors:
// Red Hat, Inc. - initial API and implementation
//
package checlusterbackup
import (
"context"
"fmt"
"os"
"time"
chev1 "github.com/eclipse-che/che-operator/api/v1"
"github.com/eclipse-che/che-operator/pkg/util"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/source"
)
const (
BackupCheEclipseOrg = "backup.che.eclipse.org"
backupDestDir = "/tmp/che-backup-data"
)
// ReconcileCheClusterBackup reconciles a CheClusterBackup object
type ReconcileCheClusterBackup struct {
// This client, initialized using mgr.Client() above, is a split client
// that reads objects from the cache and writes to the apiserver
client client.Client
scheme *runtime.Scheme
// the namespace to which to limit the reconciliation. If empty, all namespaces are considered
namespace string
}
// NewReconciler returns a new reconcile.Reconciler
func NewReconciler(mgr manager.Manager, namespace string) *ReconcileCheClusterBackup {
return &ReconcileCheClusterBackup{client: mgr.GetClient(), scheme: mgr.GetScheme(), namespace: namespace}
}
// SetupWithManager sets up the controller with the Manager.
func (r *ReconcileCheClusterBackup) SetupWithManager(mgr ctrl.Manager) error {
// Filter events to allow only create event on backup CR to trigger a new backup process
backupCRPredicate := predicate.Funcs{
UpdateFunc: func(evt event.UpdateEvent) bool {
return false
},
CreateFunc: func(evt event.CreateEvent) bool {
return true
},
DeleteFunc: func(evt event.DeleteEvent) bool {
return false
},
GenericFunc: func(evt event.GenericEvent) bool {
return false
},
}
bldr := ctrl.NewControllerManagedBy(mgr).
Named("checlusterbackup-controller").
Watches(&source.Kind{Type: &chev1.CheClusterBackup{}}, &handler.EnqueueRequestForObject{}, builder.WithPredicates(backupCRPredicate))
if r.namespace != "" {
bldr = bldr.WithEventFilter(util.InNamespaceEventFilter(r.namespace))
}
return bldr.
For(&chev1.CheClusterBackup{}).
Complete(r)
}
// Reconcile reads that state of the cluster for a CheClusterBackup object and makes changes based on the state read
// and what is in the CheClusterBackup.Spec
// Note: The Controller will requeue the Request to be processed again if the returned error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
func (r *ReconcileCheClusterBackup) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
// Fetch the CheClusterBackup instance
backupCR := &chev1.CheClusterBackup{}
err := r.client.Get(context.TODO(), request.NamespacedName, backupCR)
if err != nil {
if errors.IsNotFound(err) {
// Request object not found, could have been deleted after reconcile request.
// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
// Return and don't requeue
return ctrl.Result{}, nil
}
// Error reading the object - requeue the request.
return ctrl.Result{}, err
}
done, err := r.doReconcile(backupCR)
if err != nil {
// Log the error, so user can see it in logs
logrus.Error(err)
if !done {
// Reconcile because the job is not done yet.
// Probably the problem is related to a network error, etc.
return ctrl.Result{RequeueAfter: 1 * time.Second}, err
}
// Update backup CR status with the error
backupCR.Status.Message = "Error: " + err.Error()
backupCR.Status.State = chev1.STATE_FAILED
backupCR.Status.SnapshotId = ""
if err := r.UpdateCRStatus(backupCR); err != nil {
// Failed to update status, retry
return ctrl.Result{}, err
}
// Do not reconcile despite the fact that an error happened.
// The error cannot be handled automatically by the operator, so the user has to deal with it in manual mode.
// For example, config in the backup CR is invalid, so do not requeue as user has to correct it.
// After a modification in the backup CR, a new reconcile loop will be trigerred.
return ctrl.Result{}, nil
}
if !done {
// There was no error, but it is required to proceed after some delay,
// e.g wait until some resources are flushed and/or ready.
return ctrl.Result{RequeueAfter: 1 * time.Second}, nil
}
// Job is done
return ctrl.Result{}, nil
}
func (r *ReconcileCheClusterBackup) doReconcile(backupCR *chev1.CheClusterBackup) (bool, error) {
// Prevent any further action if backup process finished (succeeded or failed).
// To restart restore process one need to recreate restore CR.
if backupCR.Status.State != chev1.STATE_IN_PROGRESS && backupCR.Status.State != "" {
return true, nil
}
// Validate backup CR
if backupCR.Spec.BackupServerConfigRef == "" && !backupCR.Spec.UseInternalBackupServer {
return true, fmt.Errorf("BackupServerConfigRef is not set, nor UseInternalBackupServer requested")
}
// Fetch backup server config, if any
var backupServerConfigCR *chev1.CheBackupServerConfiguration
if backupCR.Spec.BackupServerConfigRef != "" {
backupServerConfigCR = &chev1.CheBackupServerConfiguration{}
backupServerConfigNamespacedName := types.NamespacedName{Namespace: backupCR.GetNamespace(), Name: backupCR.Spec.BackupServerConfigRef}
if err := r.client.Get(context.TODO(), backupServerConfigNamespacedName, backupServerConfigCR); err != nil {
if errors.IsNotFound(err) {
return true, fmt.Errorf("backup server configuration with name '%s' not found in '%s' namespace", backupCR.Spec.BackupServerConfigRef, backupCR.GetNamespace())
}
return false, err
}
}
// Create backup context
bctx, err := NewBackupContext(r, backupCR, backupServerConfigCR)
if err != nil {
// Failed to create backup context.
// This is usually caused by invalid configuration of current backup server in the backup CR.
// Do not requeue as user has to correct the configuration manually.
return true, err
}
// Update status with progress on the first reconcile loop
if bctx.backupCR.Status.State == "" {
bctx.backupCR.Status.Message = "Backup is in progress. Start time: " + time.Now().String()
bctx.backupCR.Status.State = chev1.STATE_IN_PROGRESS
bctx.backupCR.Status.Phase = bctx.state.GetPhaseMessage()
if err := r.UpdateCRStatus(backupCR); err != nil {
return false, err
}
}
// Check if internal backup server is needed
if bctx.backupCR.Spec.UseInternalBackupServer {
// Use internal REST backup server
done, err := ConfigureInternalBackupServer(bctx)
if err != nil || !done {
return done, err
}
}
// Update progress
// If internal backup server is not needed, consider step is done
bctx.state.internalBackupServerSetup = true
bctx.UpdateBackupStatusPhase()
// Make sure, that backup server configuration in the CR is valid and cache cluster resources
done, err := bctx.backupServer.PrepareConfiguration(bctx.r.client, bctx.namespace)
if err != nil || !done {
return done, err
}
// Check for repository existance and init if needed
repoExist, done, err := bctx.backupServer.IsRepositoryExist()
if err != nil || !done {
return done, err
}
if !repoExist {
done, err := bctx.backupServer.InitRepository()
if err != nil || !done {
return done, err
}
}
// Check if credentials provided in the configuration can be used to reach backup server content
done, err = bctx.backupServer.CheckRepository()
if err != nil || !done {
return done, err
}
// Update progress
bctx.state.backupRepositoryReady = true
bctx.UpdateBackupStatusPhase()
// Schedule cleanup
defer os.RemoveAll(backupDestDir)
// Collect all needed data to backup
done, err = CollectBackupData(bctx, backupDestDir)
if err != nil || !done {
return done, err
}
// Update progress
bctx.state.cheInstallationBackupDataCollected = true
bctx.UpdateBackupStatusPhase()
// Upload collected data to backup server
snapshotStat, done, err := bctx.backupServer.SendSnapshot(backupDestDir)
if err != nil || !done {
return done, err
}
// Backup is successfully done
// Update status
bctx.state.backupSnapshotSent = true
bctx.backupCR.Status.Phase = bctx.state.GetPhaseMessage()
bctx.backupCR.Status.Message = "Backup successfully finished at " + time.Now().String()
bctx.backupCR.Status.State = chev1.STATE_SUCCEEDED
bctx.backupCR.Status.SnapshotId = snapshotStat.Id
bctx.backupCR.Status.CheVersion = bctx.cheCR.Status.CheVersion
if err := bctx.r.UpdateCRStatus(bctx.backupCR); err != nil {
logrus.Errorf("Failed to update status after successful backup: %v", err)
return true, err
}
logrus.Info(bctx.backupCR.Status.Message)
return true, nil
}
func (r *ReconcileCheClusterBackup) UpdateCR(cr *chev1.CheClusterBackup) error {
err := r.client.Update(context.TODO(), cr)
if err != nil {
logrus.Errorf("Failed to update %s CR: %s", cr.Name, err.Error())
return err
}
return nil
}
func (r *ReconcileCheClusterBackup) UpdateCRStatus(cr *chev1.CheClusterBackup) error {
err := r.client.Status().Update(context.TODO(), cr)
if err != nil {
logrus.Errorf("Failed to update %s CR status: %s", cr.Name, err.Error())
return err
}
return nil
}