che #9946: Better handling of unrecoverable events for k8s & openshift infra (processing both 'container' and 'pod' events)

Signed-off-by: Ilya Buziuk <ibuziuk@redhat.com>
6.19.x
Ilya Buziuk 2018-06-22 15:35:55 +02:00 committed by Ilya Buziuk
parent dbf623c810
commit f80c1867fc
10 changed files with 109 additions and 99 deletions

View File

@ -355,7 +355,7 @@ che.infra.kubernetes.ingress_start_timeout_min=5
# If during workspace startup an unrecoverable event defined in the property occurs,
# terminate workspace immediately instead of waiting until timeout
che.infra.kubernetes.workspace_unrecoverable_events=Failed Mount,Failed Scheduling,Failed to pull image
che.infra.kubernetes.workspace_unrecoverable_events=FailedMount,FailedScheduling,MountVolume.SetUp failed,Failed to pull image
che.infra.kubernetes.bootstrapper.binary_url=http://${CHE_HOST}:${CHE_PORT}/agent-binaries/linux_amd64/bootstrapper/bootstrapper
che.infra.kubernetes.bootstrapper.installer_timeout_sec=180

View File

@ -69,9 +69,9 @@ import org.eclipse.che.workspace.infrastructure.kubernetes.environment.Kubernete
import org.eclipse.che.workspace.infrastructure.kubernetes.model.KubernetesMachineImpl;
import org.eclipse.che.workspace.infrastructure.kubernetes.model.KubernetesRuntimeState;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.KubernetesNamespace;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.ContainerEvent;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.ContainerEventHandler;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.PodActionHandler;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.PodEvent;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.PodEventHandler;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.pvc.WorkspaceVolumesStrategy;
import org.eclipse.che.workspace.infrastructure.kubernetes.server.KubernetesServerResolver;
import org.eclipse.che.workspace.infrastructure.kubernetes.util.KubernetesSharedPool;
@ -479,10 +479,10 @@ public class KubernetesInternalRuntime<
// TODO https://github.com/eclipse/che/issues/7653
// namespace.pods().watch(new AbnormalStopHandler());
namespace.pods().watchContainers(new MachineLogsPublisher());
namespace.pods().watchEvents(new MachineLogsPublisher());
if (!unrecoverableEvents.isEmpty()) {
Map<String, Pod> pods = getContext().getEnvironment().getPods();
namespace.pods().watchContainers(new UnrecoverableEventHandler(pods));
namespace.pods().watchEvents(new UnrecoverablePodEventHandler(pods));
}
final KubernetesServerResolver serverResolver =
@ -706,11 +706,11 @@ public class KubernetesInternalRuntime<
}
}
/** Listens container's events and terminates workspace if unrecoverable event occurs. */
public class UnrecoverableEventHandler implements ContainerEventHandler {
/** Listens Pod events and terminates workspace if unrecoverable event occurs. */
public class UnrecoverablePodEventHandler implements PodEventHandler {
private Map<String, Pod> workspacePods;
public UnrecoverableEventHandler(Map<String, Pod> workspacePods) {
public UnrecoverablePodEventHandler(Map<String, Pod> workspacePods) {
this.workspacePods = workspacePods;
}
@ -719,7 +719,7 @@ public class KubernetesInternalRuntime<
* and 'lastTimestamp' of the event is *after* the time of handler initialization
*/
@Override
public void handle(ContainerEvent event) {
public void handle(PodEvent event) {
if (isWorkspaceEvent(event) && isUnrecoverable(event)) {
String reason = event.getReason();
String message = event.getMessage();
@ -740,7 +740,7 @@ public class KubernetesInternalRuntime<
}
/** Returns true if event belongs to one of the workspace pods, false otherwise */
private boolean isWorkspaceEvent(ContainerEvent event) {
private boolean isWorkspaceEvent(PodEvent event) {
String podName = event.getPodName();
return workspacePods.containsKey(podName);
}
@ -751,7 +751,7 @@ public class KubernetesInternalRuntime<
*
* @param event event to check
*/
private boolean isUnrecoverable(ContainerEvent event) {
private boolean isUnrecoverable(PodEvent event) {
boolean isUnrecoverable = false;
String reason = event.getReason();
String message = event.getMessage();
@ -773,10 +773,10 @@ public class KubernetesInternalRuntime<
}
/** Listens container's events and publish them as machine logs. */
public class MachineLogsPublisher implements ContainerEventHandler {
public class MachineLogsPublisher implements PodEventHandler {
@Override
public void handle(ContainerEvent event) {
public void handle(PodEvent event) {
final String podName = event.getPodName();
final String containerName = event.getContainerName();
try {

View File

@ -53,10 +53,10 @@ import okhttp3.Response;
import org.eclipse.che.api.workspace.server.spi.InfrastructureException;
import org.eclipse.che.workspace.infrastructure.kubernetes.KubernetesClientFactory;
import org.eclipse.che.workspace.infrastructure.kubernetes.KubernetesInfrastructureException;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.ContainerEvent;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.ContainerEventHandler;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.PodActionHandler;
import org.eclipse.che.workspace.infrastructure.kubernetes.util.ContainerEvents;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.PodEvent;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.PodEventHandler;
import org.eclipse.che.workspace.infrastructure.kubernetes.util.PodEvents;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -89,7 +89,7 @@ public class KubernetesPods {
private final String namespace;
private final KubernetesClientFactory clientFactory;
private final ConcurrentLinkedQueue<PodActionHandler> podActionHandlers;
private final ConcurrentLinkedQueue<ContainerEventHandler> containerEventsHandlers;
private final ConcurrentLinkedQueue<PodEventHandler> containerEventsHandlers;
private final String workspaceId;
private Watch podWatch;
private Watch containerWatch;
@ -350,39 +350,33 @@ public class KubernetesPods {
* @param handler pod container events handler
* @throws InfrastructureException if any error occurs while watcher starting
*/
public void watchContainers(ContainerEventHandler handler) throws InfrastructureException {
public void watchEvents(PodEventHandler handler) throws InfrastructureException {
if (containerWatch == null) {
final Watcher<Event> watcher =
new Watcher<Event>() {
@Override
public void eventReceived(Action action, Event event) {
ObjectReference involvedObject = event.getInvolvedObject();
String fieldPath = involvedObject.getFieldPath();
// check that event related to
if (POD_OBJECT_KIND.equals(involvedObject.getKind()) && fieldPath != null) {
Matcher containerFieldMatcher = CONTAINER_FIELD_PATH_PATTERN.matcher(fieldPath);
if (containerFieldMatcher.matches()) {
if (POD_OBJECT_KIND.equals(involvedObject.getKind())) {
String podName = involvedObject.getName();
String containerName = containerFieldMatcher.group(CONTAINER_NAME_GROUP);
String podName = involvedObject.getName();
ContainerEvent containerEvent =
new ContainerEvent(
podName,
containerName,
event.getReason(),
event.getMessage(),
event.getMetadata().getCreationTimestamp(),
event.getLastTimestamp());
PodEvent podEvent =
new PodEvent(
podName,
getContainerName(involvedObject.getFieldPath()),
event.getReason(),
event.getMessage(),
event.getMetadata().getCreationTimestamp(),
event.getLastTimestamp());
try {
if (happenedAfterWatcherInitialization(containerEvent)) {
containerEventsHandlers.forEach(h -> h.handle(containerEvent));
}
} catch (ParseException e) {
LOG.error("Failed to parse last timestamp of the event: {}", containerEvent);
try {
if (happenedAfterWatcherInitialization(podEvent)) {
containerEventsHandlers.forEach(h -> h.handle(podEvent));
}
} catch (ParseException e) {
LOG.error("Failed to parse last timestamp of the event: {}", podEvent);
}
}
}
@ -390,15 +384,31 @@ public class KubernetesPods {
@Override
public void onClose(KubernetesClientException ignored) {}
/**
* Returns the container name if the event is related to container. When the event is
* related to container `fieldPath` field contain information in the following format:
* `spec.container{web}`, where `web` is container name
*/
private String getContainerName(String fieldPath) {
String containerName = null;
if (fieldPath != null) {
Matcher containerFieldMatcher = CONTAINER_FIELD_PATH_PATTERN.matcher(fieldPath);
if (containerFieldMatcher.matches()) {
containerName = containerFieldMatcher.group(CONTAINER_NAME_GROUP);
}
}
return containerName;
}
/**
* Returns true if 'lastTimestamp' of the event is *after* the time of the watcher
* initialization
*/
private boolean happenedAfterWatcherInitialization(ContainerEvent event)
private boolean happenedAfterWatcherInitialization(PodEvent event)
throws ParseException {
String eventLastTimestamp = event.getLastTimestamp();
Date eventLastTimestampDate =
ContainerEvents.convertEventTimestampToDate(eventLastTimestamp);
PodEvents.convertEventTimestampToDate(eventLastTimestamp);
return eventLastTimestampDate.after(watcherInitializationDate);
}
};

View File

@ -11,22 +11,23 @@
package org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event;
import java.util.Objects;
import org.eclipse.che.commons.annotation.Nullable;
/**
* The event that should be published when container change occurs, e.g. image pulled, container
* started.
* The event that should be published when the pod event occurs, e.g. pulling image, created
* container
*
* @author Sergii Leshchenko
*/
public class ContainerEvent {
public class PodEvent {
private final String podName;
private final String containerName;
@Nullable private final String containerName;
private final String reason;
private final String message;
private final String creationTimestamp;
private final String lastTimestamp;
public ContainerEvent(
public PodEvent(
String podName,
String containerName,
String reason,
@ -41,12 +42,16 @@ public class ContainerEvent {
this.lastTimestamp = lastTimestamp;
}
/** Returns name of pod related to container. */
/** Returns name of pod related to the event. */
public String getPodName() {
return podName;
}
/** Returns container name which produced event. */
/**
* Returns container name produced by the event. Could be null if the event is related to the pod
* but not any particular 'container'
*/
@Nullable
public String getContainerName() {
return containerName;
}
@ -79,7 +84,7 @@ public class ContainerEvent {
if (o == null || getClass() != o.getClass()) {
return false;
}
ContainerEvent that = (ContainerEvent) o;
PodEvent that = (PodEvent) o;
return Objects.equals(podName, that.podName)
&& Objects.equals(containerName, that.containerName)
&& Objects.equals(reason, that.reason)
@ -95,7 +100,7 @@ public class ContainerEvent {
@Override
public String toString() {
return "ContainerEvent{"
return "PodEvent{"
+ "podName='"
+ podName
+ '\''

View File

@ -11,12 +11,12 @@
package org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event;
/**
* Defines the handling mechanism for Kubernetes container events.
* Defines the handling mechanism for Kubernetes events.
*
* @author Sergii Leshchenko
*/
public interface ContainerEventHandler {
public interface PodEventHandler {
/** Handles the container event. */
void handle(ContainerEvent event);
void handle(PodEvent event);
}

View File

@ -14,30 +14,26 @@ import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.ContainerEvent;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.PodEvent;
/**
* Helps to simplify the interaction with the {@link ContainerEvent}.
* Helps to simplify the interaction with the {@link PodEvent}.
*
* @author Ilya Buziuk
*/
public final class ContainerEvents {
public final class PodEvents {
private static DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX");
private ContainerEvents() {}
private PodEvents() {}
/**
* Converts the time of {@link ContainerEvent} e.g. '2018-05-15T16:17:54Z' to the {@link Date}
* format
* Converts the time of {@link PodEvent} e.g. '2018-05-15T16:17:54Z' to the {@link Date} format
*/
public static Date convertEventTimestampToDate(String timestamp) throws ParseException {
return dateFormat.parse(timestamp);
}
/**
* Converts the {@link Date} to {@link ContainerEvent} timestamp format e.g.
* '2018-05-15T16:17:54Z'
*/
/** Converts the {@link Date} to {@link PodEvent} timestamp format e.g. '2018-05-15T16:17:54Z' */
public static String convertDateToEventTimestamp(Date date) {
return dateFormat.format(date);
}

View File

@ -98,7 +98,7 @@ import org.eclipse.che.api.workspace.server.spi.environment.InternalMachineConfi
import org.eclipse.che.api.workspace.shared.dto.event.MachineLogEvent;
import org.eclipse.che.api.workspace.shared.dto.event.MachineStatusEvent;
import org.eclipse.che.workspace.infrastructure.kubernetes.KubernetesInternalRuntime.MachineLogsPublisher;
import org.eclipse.che.workspace.infrastructure.kubernetes.KubernetesInternalRuntime.UnrecoverableEventHandler;
import org.eclipse.che.workspace.infrastructure.kubernetes.KubernetesInternalRuntime.UnrecoverablePodEventHandler;
import org.eclipse.che.workspace.infrastructure.kubernetes.bootstrapper.KubernetesBootstrapper;
import org.eclipse.che.workspace.infrastructure.kubernetes.bootstrapper.KubernetesBootstrapperFactory;
import org.eclipse.che.workspace.infrastructure.kubernetes.cache.KubernetesMachineCache;
@ -114,11 +114,11 @@ import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.KubernetesN
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.KubernetesPods;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.KubernetesSecrets;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.KubernetesServices;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.ContainerEvent;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.event.PodEvent;
import org.eclipse.che.workspace.infrastructure.kubernetes.namespace.pvc.WorkspaceVolumesStrategy;
import org.eclipse.che.workspace.infrastructure.kubernetes.server.KubernetesServerResolver;
import org.eclipse.che.workspace.infrastructure.kubernetes.util.ContainerEvents;
import org.eclipse.che.workspace.infrastructure.kubernetes.util.KubernetesSharedPool;
import org.eclipse.che.workspace.infrastructure.kubernetes.util.PodEvents;
import org.eclipse.che.workspace.infrastructure.kubernetes.util.RuntimeEventsPublisher;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
@ -293,8 +293,7 @@ public class KubernetesInternalRuntimeTest {
verify(pods).create(any());
verify(ingresses).create(any());
verify(services).create(any());
verify(secrets).create(any());
verify(namespace.pods(), times(2)).watchContainers(any());
verify(namespace.pods(), times(2)).watchEvents(any());
verify(bootstrapper, times(2)).bootstrapAsync();
verify(eventService, times(4)).publish(any());
verifyOrderedEventsChains(
@ -322,7 +321,7 @@ public class KubernetesInternalRuntimeTest {
verify(pods).create(any());
verify(ingresses).create(any());
verify(services).create(any());
verify(namespace.pods(), times(1)).watchContainers(any());
verify(namespace.pods(), times(1)).watchEvents(any());
verify(bootstrapper, times(2)).bootstrapAsync();
verify(eventService, times(4)).publish(any());
verifyOrderedEventsChains(
@ -436,14 +435,14 @@ public class KubernetesInternalRuntimeTest {
@Test
public void testRepublishContainerOutputAsMachineLogEvents() throws Exception {
final MachineLogsPublisher logsPublisher = internalRuntime.new MachineLogsPublisher();
final ContainerEvent out1 =
final PodEvent out1 =
mockContainerEvent(
WORKSPACE_POD_NAME,
"Pulling",
"pulling image",
EVENT_CREATION_TIMESTAMP,
getCurrentTimestampWithOneHourShiftAhead());
final ContainerEvent out2 =
final PodEvent out2 =
mockContainerEvent(
WORKSPACE_POD_NAME,
"Pulled",
@ -465,9 +464,9 @@ public class KubernetesInternalRuntimeTest {
@Test
public void testHandleUnrecoverableEventByReason() throws Exception {
final String unrecoverableEventReason = "Failed Mount";
final UnrecoverableEventHandler unrecoverableEventHandler =
internalRuntime.new UnrecoverableEventHandler(k8sEnv.getPods());
final ContainerEvent unrecoverableEvent =
final UnrecoverablePodEventHandler unrecoverableEventHandler =
internalRuntime.new UnrecoverablePodEventHandler(k8sEnv.getPods());
final PodEvent unrecoverableEvent =
mockContainerEvent(
WORKSPACE_POD_NAME,
unrecoverableEventReason,
@ -483,9 +482,9 @@ public class KubernetesInternalRuntimeTest {
public void testHandleUnrecoverableEventByMessage() throws Exception {
final String unrecoverableEventMessage =
"Failed to pull image eclipse/che-server:nightly-centos";
final UnrecoverableEventHandler unrecoverableEventHandler =
internalRuntime.new UnrecoverableEventHandler(k8sEnv.getPods());
final ContainerEvent unrecoverableEvent =
final UnrecoverablePodEventHandler unrecoverableEventHandler =
internalRuntime.new UnrecoverablePodEventHandler(k8sEnv.getPods());
final PodEvent unrecoverableEvent =
mockContainerEvent(
WORKSPACE_POD_NAME,
"Pulling",
@ -501,9 +500,9 @@ public class KubernetesInternalRuntimeTest {
public void testDoNotHandleUnrecoverableEventFromNonWorkspacePod() throws Exception {
final String unrecoverableEventMessage =
"Failed to pull image eclipse/che-server:nightly-centos";
final UnrecoverableEventHandler unrecoverableEventHandler =
internalRuntime.new UnrecoverableEventHandler(k8sEnv.getPods());
final ContainerEvent unrecoverableEvent =
final UnrecoverablePodEventHandler unrecoverableEventHandler =
internalRuntime.new UnrecoverablePodEventHandler(k8sEnv.getPods());
final PodEvent unrecoverableEvent =
mockContainerEvent(
"NonWorkspacePod",
"Pulling",
@ -518,9 +517,9 @@ public class KubernetesInternalRuntimeTest {
@Test
public void testHandleRegularEvent() throws Exception {
final UnrecoverableEventHandler unrecoverableEventHandler =
internalRuntime.new UnrecoverableEventHandler(k8sEnv.getPods());
final ContainerEvent regularEvent =
final UnrecoverablePodEventHandler unrecoverableEventHandler =
internalRuntime.new UnrecoverablePodEventHandler(k8sEnv.getPods());
final PodEvent regularEvent =
mockContainerEvent(
WORKSPACE_POD_NAME,
"Pulling",
@ -535,7 +534,7 @@ public class KubernetesInternalRuntimeTest {
@Test
public void testDoNotPublishForeignMachineOutput() throws ParseException {
final MachineLogsPublisher logsPublisher = internalRuntime.new MachineLogsPublisher();
final ContainerEvent out1 =
final PodEvent out1 =
mockContainerEvent(
WORKSPACE_POD_NAME,
"Created",
@ -833,13 +832,13 @@ public class KubernetesInternalRuntimeTest {
return metadata;
}
private static ContainerEvent mockContainerEvent(
private static PodEvent mockContainerEvent(
String podName,
String reason,
String message,
String creationTimestamp,
String lastTimestamp) {
final ContainerEvent event = mock(ContainerEvent.class);
final PodEvent event = mock(PodEvent.class);
when(event.getPodName()).thenReturn(podName);
when(event.getContainerName()).thenReturn(CONTAINER_NAME_1);
when(event.getReason()).thenReturn(reason);
@ -849,7 +848,7 @@ public class KubernetesInternalRuntimeTest {
return event;
}
private static MachineLogEvent asMachineLogEvent(ContainerEvent event) {
private static MachineLogEvent asMachineLogEvent(PodEvent event) {
return newDto(MachineLogEvent.class)
.withRuntimeId(DtoConverter.asDto(IDENTITY))
.withText(event.getMessage())
@ -859,7 +858,7 @@ public class KubernetesInternalRuntimeTest {
private String getCurrentTimestampWithOneHourShiftAhead() throws ParseException {
Date currentTimestampWithOneHourShiftAhead = new Date(new Date().getTime() + 3600 * 1000);
return ContainerEvents.convertDateToEventTimestamp(currentTimestampWithOneHourShiftAhead);
return PodEvents.convertDateToEventTimestamp(currentTimestampWithOneHourShiftAhead);
}
private static IntOrString intOrString(int port) {

View File

@ -16,31 +16,31 @@ import org.testng.Assert;
import org.testng.annotations.Test;
/**
* Tests {@link ContainerEvents}.
* Tests {@link PodEvents}.
*
* @author Ilya Buziuk
*/
public class ContainerEventsTest {
public class PodEventsTest {
@Test
public void eventDateShouldBeBeforeCurrentDate() throws ParseException {
String eventTime = "2018-05-15T16:17:54Z";
Date eventDate = ContainerEvents.convertEventTimestampToDate(eventTime);
Date eventDate = PodEvents.convertEventTimestampToDate(eventTime);
Assert.assertTrue(eventDate.before(new Date()));
}
@Test(expectedExceptions = ParseException.class)
public void throwsParseExceptionWhenDateFormatIsInvalid() throws ParseException {
String eventTime = "2018-05-15T16:143435Z";
ContainerEvents.convertEventTimestampToDate(eventTime);
PodEvents.convertEventTimestampToDate(eventTime);
}
@Test
public void getEventTimestampFromDate() throws ParseException {
String timestamp = "2018-05-15T16:17:54Z";
Date date = ContainerEvents.convertEventTimestampToDate(timestamp);
String timestampFromDate = ContainerEvents.convertDateToEventTimestamp(date);
Date dateAfterParsingTimestamp = ContainerEvents.convertEventTimestampToDate(timestampFromDate);
Date date = PodEvents.convertEventTimestampToDate(timestamp);
String timestampFromDate = PodEvents.convertDateToEventTimestamp(date);
Date dateAfterParsingTimestamp = PodEvents.convertEventTimestampToDate(timestampFromDate);
Assert.assertEquals(date, dateAfterParsingTimestamp);
}
}

View File

@ -110,10 +110,10 @@ public class OpenShiftInternalRuntime extends KubernetesInternalRuntime<OpenShif
// TODO https://github.com/eclipse/che/issues/7653
// project.pods().watch(new AbnormalStopHandler());
project.pods().watchContainers(new MachineLogsPublisher());
project.pods().watchEvents(new MachineLogsPublisher());
if (!unrecoverableEvents.isEmpty()) {
Map<String, Pod> pods = getContext().getEnvironment().getPods();
project.pods().watchContainers(new UnrecoverableEventHandler(pods));
project.pods().watchEvents(new UnrecoverablePodEventHandler(pods));
}
doStartMachine(new OpenShiftServerResolver(createdServices, createdRoutes));

View File

@ -235,7 +235,7 @@ public class OpenShiftInternalRuntimeTest {
verify(services).create(any());
verify(secrets).create(any());
verify(project.pods(), times(2)).watchContainers(any());
verify(project.pods(), times(2)).watchEvents(any());
verify(eventService, times(2)).publish(any());
verifyEventsOrder(newEvent(M1_NAME, STARTING), newEvent(M2_NAME, STARTING));
}
@ -254,7 +254,7 @@ public class OpenShiftInternalRuntimeTest {
verify(routes).create(any());
verify(services).create(any());
verify(project.pods(), times(1)).watchContainers(any());
verify(project.pods(), times(1)).watchEvents(any());
verify(eventService, times(2)).publish(any());
verifyEventsOrder(newEvent(M1_NAME, STARTING), newEvent(M2_NAME, STARTING));
}