fix: improve restore stability (#996)

* fix: restore reconciler

Signed-off-by: Anatolii Bazko <abazko@redhat.com>
pull/1002/head
Anatolii Bazko 2021-08-10 17:13:58 +03:00 committed by GitHub
parent bbb2b4766c
commit 7ff7399be9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 32 additions and 14 deletions

View File

@ -124,14 +124,25 @@ func cleanPreviousInstallation(rctx *RestoreContext, dataDir string) (bool, erro
}
// Delete Che CR to stop operator from dealing with current installation
err := rctx.r.client.Delete(context.TODO(), rctx.cheCR)
if err == nil {
// Che CR is marked for deletion, but actually still exists.
// Wait for finalizers and actual resource deletion (not found expected).
logrus.Info("Restore: Waiting for old Che CR finalizers to be completed")
return false, nil
} else if !errors.IsNotFound(err) {
actualCheCR, cheCRCount, err := util.FindCheCRinNamespace(rctx.r.client, rctx.namespace)
if cheCRCount == -1 {
// error occurred while retreiving CheCluster CR
return false, err
} else if actualCheCR != nil {
if actualCheCR.GetObjectMeta().GetDeletionTimestamp().IsZero() {
logrus.Infof("Restore: Deleteing CheCluster custom resource in '%s' namespace", rctx.namespace)
err := rctx.r.client.Delete(context.TODO(), actualCheCR)
if err == nil {
// Che CR is marked for deletion, but actually still exists.
// Wait for finalizers and actual resource deletion (not found expected).
logrus.Info("Restore: Waiting for old Che CR finalizers to be completed")
return false, nil
} else if !errors.IsNotFound(err) {
return false, err
}
} else {
return false, nil
}
}
// Define label selector for resources to clean up
@ -316,11 +327,16 @@ func restoreCheCR(rctx *RestoreContext, dataDir string) (bool, error) {
if err := rctx.r.client.Create(context.TODO(), cheCR); err != nil {
if errors.IsAlreadyExists(err) {
return false, rctx.r.client.Delete(context.TODO(), cheCR)
// We should take into account that every step can be executed several times due to async behavior.
// 1. We ensured that CheCluster is removed before restoring.
// 2. If it is already created then it is safe to continue (was created here on a previous reconcile loop)
return true, nil
}
return false, err
}
logrus.Info("Restore: CheCluster custom resource created")
rctx.cheCR = cheCR
return true, nil
}

View File

@ -211,13 +211,12 @@ func (r *ReconcileCheClusterRestore) doReconcile(restoreCR *chev1.CheClusterRest
return done, err
}
rctx.state.cheRestored = true
rctx.UpdateRestoreStatus()
// Clean up backup data after successful restore
if err := os.RemoveAll(backupDataDestDir); err != nil {
return false, err
}
rctx.state.cheRestored = true
}
rctx.restoreCR.Status.Message = "Restore successfully finished"
@ -247,5 +246,6 @@ func (r *ReconcileCheClusterRestore) UpdateCRStatus(cr *chev1.CheClusterRestore)
logrus.Errorf("Failed to update %s CR status: %s", cr.Name, err.Error())
return err
}
logrus.Infof("Status updated with %v: ", cr.Status)
return nil
}

View File

@ -17,6 +17,7 @@ import (
chev1 "github.com/eclipse-che/che-operator/api/v1"
backup "github.com/eclipse-che/che-operator/pkg/backup_servers"
"github.com/eclipse-che/che-operator/pkg/util"
"github.com/sirupsen/logrus"
)
type RestoreContext struct {
@ -158,5 +159,6 @@ func NewRestoreState(restoreCR *chev1.CheClusterRestore) (*RestoreState, error)
}
}
logrus.Debugf("Restore state: %v", rs)
return rs, nil
}

View File

@ -571,7 +571,7 @@ func ReloadCheCluster(client client.Client, cheCluster *orgv1.CheCluster) error
func FindCheCRinNamespace(client client.Client, namespace string) (*orgv1.CheCluster, int, error) {
cheClusters := &orgv1.CheClusterList{}
if err := client.List(context.TODO(), cheClusters); err != nil {
return nil, 0, err
return nil, -1, err
}
if len(cheClusters.Items) != 1 {
@ -582,7 +582,7 @@ func FindCheCRinNamespace(client client.Client, namespace string) (*orgv1.CheClu
namespacedName := types.NamespacedName{Namespace: namespace, Name: cheClusters.Items[0].GetName()}
err := client.Get(context.TODO(), namespacedName, cheCR)
if err != nil {
return nil, 0, err
return nil, -1, err
}
return cheCR, 1, nil
}
@ -609,6 +609,6 @@ func UpdateBackupServerConfigurationStatus(client client.Client, backupServerCon
// It is required to remove ResourceVersion in order to be able to apply the yaml again.
func ClearMetadata(objectMeta *metav1.ObjectMeta) {
objectMeta.ResourceVersion = ""
objectMeta.Finalizers = []string{}
objectMeta.ManagedFields = []metav1.ManagedFieldsEntry{}
}