diff --git a/controllers/devworkspace/solver/che_routing.go b/controllers/devworkspace/solver/che_routing.go index e22213619..5a0c16568 100644 --- a/controllers/devworkspace/solver/che_routing.go +++ b/controllers/devworkspace/solver/che_routing.go @@ -507,7 +507,9 @@ func provisionMainWorkspaceRoute(cheCluster *chev2.CheCluster, routing *dwo.DevW } } -// when accessing workspace url, if 5xx error is returned, redirect to the dashboard service +// add5XXErrorHandling adds traefik middlewares to the traefik config such that +// when a connection cannot be established with the workspace service (causing a 5XX error code), traefik +// routes the request to the dashboard service instead. func add5XXErrorHandling(cfg *gateway.TraefikConfig, dwId string) { // revalidate cache to prevent case where redirect to dashboard after trying to restart an idled workspace noCacheHeader := map[string]string{"cache-control": "no-store, max-age=0"} @@ -518,13 +520,16 @@ func add5XXErrorHandling(cfg *gateway.TraefikConfig, dwId string) { cfg.AddErrors(dwId, "500-599", dashboardServiceName, "/") if infrastructure.IsOpenShift() { - // On OpenShift, fire errors middleware after 4 seconds of not being able to connect to service + // If a connection cannot be established with the workspace service within the `DialTimeout`, traefik + // will retry the connection with an exponential backoff cfg.HTTP.ServersTransports = map[string]*gateway.TraefikConfigServersTransport{} + cfg.HTTP.ServersTransports[dwId] = &gateway.TraefikConfigServersTransport{ ForwardingTimeouts: &gateway.TraefikConfigForwardingTimeouts{ - DialTimeout: "4s", + DialTimeout: "2500ms", }, } + cfg.AddRetry(dwId, 2, "500ms") cfg.HTTP.Services[dwId].LoadBalancer.ServersTransport = dwId } } diff --git a/controllers/devworkspace/solver/che_routing_test.go b/controllers/devworkspace/solver/che_routing_test.go index fc4783d4d..9e9d65196 100644 --- a/controllers/devworkspace/solver/che_routing_test.go +++ b/controllers/devworkspace/solver/che_routing_test.go @@ -402,7 +402,7 @@ func TestCreateRelocatedObjectsOpenshift(t *testing.T) { workspaceMainConfig := gateway.TraefikConfig{} assert.NoError(t, yaml.Unmarshal([]byte(traefikMainWorkspaceConfig), &workspaceMainConfig)) - assert.Len(t, workspaceMainConfig.HTTP.Middlewares, 5) + assert.Len(t, workspaceMainConfig.HTTP.Middlewares, 6) wsid = "wsid" mwares := []string{ @@ -410,7 +410,8 @@ func TestCreateRelocatedObjectsOpenshift(t *testing.T) { wsid + gateway.StripPrefixMiddlewareSuffix, wsid + gateway.HeaderRewriteMiddlewareSuffix, wsid + gateway.HeadersMiddlewareSuffix, - wsid + gateway.ErrorsMiddlewareSuffix} + wsid + gateway.ErrorsMiddlewareSuffix, + wsid + gateway.RetryMiddlewareSuffix} for _, mware := range mwares { assert.Contains(t, workspaceMainConfig.HTTP.Middlewares, mware) diff --git a/pkg/deploy/gateway/traefik_config.go b/pkg/deploy/gateway/traefik_config.go index d561fa039..b74ddf56a 100644 --- a/pkg/deploy/gateway/traefik_config.go +++ b/pkg/deploy/gateway/traefik_config.go @@ -39,6 +39,7 @@ type TraefikConfigMiddleware struct { ForwardAuth *TraefikConfigForwardAuth `json:"forwardAuth,omitempty"` Errors *TraefikConfigErrors `json:"errors,omitempty"` Headers *TraefikConfigHeaders `json:"headers,omitempty"` + Retry *TraefikConfigRetry `json:"retry,omitempty"` Plugin *TraefikPlugin `json:"plugin,omitempty"` } @@ -75,6 +76,12 @@ type TraefikConfigHeaders struct { CustomResponseHeaders map[string]string `json:"customResponseHeaders,omitempty"` } +type TraefikConfigRetry struct { + CustomResponseHeaders map[string]string `json:"customResponseHeaders,omitempty"` + Attempts int `json:"attempts,omitempty"` + InitialInterval string `json:"initialInterval,omitempty"` +} + type TraefikPlugin struct { HeaderRewrite *TraefikPluginHeaderRewrite `json:"header-rewrite,omitempty"` } diff --git a/pkg/deploy/gateway/traefik_config_util.go b/pkg/deploy/gateway/traefik_config_util.go index ad74c1c92..eef27c416 100644 --- a/pkg/deploy/gateway/traefik_config_util.go +++ b/pkg/deploy/gateway/traefik_config_util.go @@ -17,6 +17,7 @@ const ( AuthMiddlewareSuffix = "-auth" ErrorsMiddlewareSuffix = "-errors" HeadersMiddlewareSuffix = "-headers" + RetryMiddlewareSuffix = "-retry" ) func CreateEmptyTraefikConfig() *TraefikConfig { @@ -126,3 +127,14 @@ func (cfg *TraefikConfig) AddResponseHeaders(componentName string, headers map[s }, } } + +func (cfg *TraefikConfig) AddRetry(componentName string, attempts int, initialInterval string) { + middlewareName := componentName + RetryMiddlewareSuffix + cfg.HTTP.Routers[componentName].Middlewares = append(cfg.HTTP.Routers[componentName].Middlewares, middlewareName) + cfg.HTTP.Middlewares[middlewareName] = &TraefikConfigMiddleware{ + Retry: &TraefikConfigRetry{ + Attempts: attempts, + InitialInterval: initialInterval, + }, + } +} diff --git a/pkg/deploy/gateway/traefik_config_util_test.go b/pkg/deploy/gateway/traefik_config_util_test.go index 413ed4c20..fc6b39dcb 100644 --- a/pkg/deploy/gateway/traefik_config_util_test.go +++ b/pkg/deploy/gateway/traefik_config_util_test.go @@ -124,6 +124,22 @@ func TestAddResponseHeaders(t *testing.T) { } } +func TestAddRetry(t *testing.T) { + attempts := 3 + initialInterval := "100ms" + + cfg := CreateCommonTraefikConfig(testComponentName, testRule, 1, "http://svc:8080", []string{}) + cfg.AddRetry(testComponentName, attempts, initialInterval) + + assert.Len(t, cfg.HTTP.Routers[testComponentName].Middlewares, 1, *cfg) + assert.Len(t, cfg.HTTP.Middlewares, 1, *cfg) + middlewareName := cfg.HTTP.Routers[testComponentName].Middlewares[0] + if assert.Contains(t, cfg.HTTP.Middlewares, middlewareName, *cfg) && assert.NotNil(t, cfg.HTTP.Middlewares[middlewareName].Retry) { + assert.Equal(t, attempts, cfg.HTTP.Middlewares[middlewareName].Retry.Attempts) + assert.Equal(t, initialInterval, cfg.HTTP.Middlewares[middlewareName].Retry.InitialInterval) + } +} + func TestMiddlewaresPreserveOrder(t *testing.T) { t.Run("strip-header", func(t *testing.T) { cfg := CreateCommonTraefikConfig(testComponentName, testRule, 1, "http://svc:8080", []string{})