fix: improve restore stability (#996)
* fix: restore reconciler Signed-off-by: Anatolii Bazko <abazko@redhat.com>pull/1002/head
parent
bbb2b4766c
commit
7ff7399be9
|
|
@ -124,14 +124,25 @@ func cleanPreviousInstallation(rctx *RestoreContext, dataDir string) (bool, erro
|
|||
}
|
||||
|
||||
// Delete Che CR to stop operator from dealing with current installation
|
||||
err := rctx.r.client.Delete(context.TODO(), rctx.cheCR)
|
||||
if err == nil {
|
||||
// Che CR is marked for deletion, but actually still exists.
|
||||
// Wait for finalizers and actual resource deletion (not found expected).
|
||||
logrus.Info("Restore: Waiting for old Che CR finalizers to be completed")
|
||||
return false, nil
|
||||
} else if !errors.IsNotFound(err) {
|
||||
actualCheCR, cheCRCount, err := util.FindCheCRinNamespace(rctx.r.client, rctx.namespace)
|
||||
if cheCRCount == -1 {
|
||||
// error occurred while retreiving CheCluster CR
|
||||
return false, err
|
||||
} else if actualCheCR != nil {
|
||||
if actualCheCR.GetObjectMeta().GetDeletionTimestamp().IsZero() {
|
||||
logrus.Infof("Restore: Deleteing CheCluster custom resource in '%s' namespace", rctx.namespace)
|
||||
err := rctx.r.client.Delete(context.TODO(), actualCheCR)
|
||||
if err == nil {
|
||||
// Che CR is marked for deletion, but actually still exists.
|
||||
// Wait for finalizers and actual resource deletion (not found expected).
|
||||
logrus.Info("Restore: Waiting for old Che CR finalizers to be completed")
|
||||
return false, nil
|
||||
} else if !errors.IsNotFound(err) {
|
||||
return false, err
|
||||
}
|
||||
} else {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Define label selector for resources to clean up
|
||||
|
|
@ -316,11 +327,16 @@ func restoreCheCR(rctx *RestoreContext, dataDir string) (bool, error) {
|
|||
|
||||
if err := rctx.r.client.Create(context.TODO(), cheCR); err != nil {
|
||||
if errors.IsAlreadyExists(err) {
|
||||
return false, rctx.r.client.Delete(context.TODO(), cheCR)
|
||||
// We should take into account that every step can be executed several times due to async behavior.
|
||||
// 1. We ensured that CheCluster is removed before restoring.
|
||||
// 2. If it is already created then it is safe to continue (was created here on a previous reconcile loop)
|
||||
return true, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
|
||||
logrus.Info("Restore: CheCluster custom resource created")
|
||||
|
||||
rctx.cheCR = cheCR
|
||||
return true, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -211,13 +211,12 @@ func (r *ReconcileCheClusterRestore) doReconcile(restoreCR *chev1.CheClusterRest
|
|||
return done, err
|
||||
}
|
||||
|
||||
rctx.state.cheRestored = true
|
||||
rctx.UpdateRestoreStatus()
|
||||
|
||||
// Clean up backup data after successful restore
|
||||
if err := os.RemoveAll(backupDataDestDir); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
rctx.state.cheRestored = true
|
||||
}
|
||||
|
||||
rctx.restoreCR.Status.Message = "Restore successfully finished"
|
||||
|
|
@ -247,5 +246,6 @@ func (r *ReconcileCheClusterRestore) UpdateCRStatus(cr *chev1.CheClusterRestore)
|
|||
logrus.Errorf("Failed to update %s CR status: %s", cr.Name, err.Error())
|
||||
return err
|
||||
}
|
||||
logrus.Infof("Status updated with %v: ", cr.Status)
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import (
|
|||
chev1 "github.com/eclipse-che/che-operator/api/v1"
|
||||
backup "github.com/eclipse-che/che-operator/pkg/backup_servers"
|
||||
"github.com/eclipse-che/che-operator/pkg/util"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type RestoreContext struct {
|
||||
|
|
@ -158,5 +159,6 @@ func NewRestoreState(restoreCR *chev1.CheClusterRestore) (*RestoreState, error)
|
|||
}
|
||||
}
|
||||
|
||||
logrus.Debugf("Restore state: %v", rs)
|
||||
return rs, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -571,7 +571,7 @@ func ReloadCheCluster(client client.Client, cheCluster *orgv1.CheCluster) error
|
|||
func FindCheCRinNamespace(client client.Client, namespace string) (*orgv1.CheCluster, int, error) {
|
||||
cheClusters := &orgv1.CheClusterList{}
|
||||
if err := client.List(context.TODO(), cheClusters); err != nil {
|
||||
return nil, 0, err
|
||||
return nil, -1, err
|
||||
}
|
||||
|
||||
if len(cheClusters.Items) != 1 {
|
||||
|
|
@ -582,7 +582,7 @@ func FindCheCRinNamespace(client client.Client, namespace string) (*orgv1.CheClu
|
|||
namespacedName := types.NamespacedName{Namespace: namespace, Name: cheClusters.Items[0].GetName()}
|
||||
err := client.Get(context.TODO(), namespacedName, cheCR)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
return nil, -1, err
|
||||
}
|
||||
return cheCR, 1, nil
|
||||
}
|
||||
|
|
@ -609,6 +609,6 @@ func UpdateBackupServerConfigurationStatus(client client.Client, backupServerCon
|
|||
// It is required to remove ResourceVersion in order to be able to apply the yaml again.
|
||||
func ClearMetadata(objectMeta *metav1.ObjectMeta) {
|
||||
objectMeta.ResourceVersion = ""
|
||||
|
||||
objectMeta.Finalizers = []string{}
|
||||
objectMeta.ManagedFields = []metav1.ManagedFieldsEntry{}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue