From 1d9c426e2d73397fe999edef58b0bbf3d2bebffc Mon Sep 17 00:00:00 2001 From: Angel Misevski Date: Wed, 1 Mar 2017 00:56:32 -0500 Subject: [PATCH] Ensure OpenShift pod is terminated before returning from removeContainer (#4230) Prevents issue where it is possible to start workspace while pod is terminating. When this occurs, Che creates a new pod but workspace start fails. This pod must be removed manually before that workspace can be launched again. This is done by a) waiting for pod to terminate before returning from removeContainer, and b) removing created deployment and service when an exception occurs during createContainer. Additionally, increases maximum wait time for retreiving OpenShift image stream metadata Signed-off-by: Angel Misevski --- .../openshift/client/OpenShiftConnector.java | 160 +++++++++--------- 1 file changed, 81 insertions(+), 79 deletions(-) diff --git a/plugins/plugin-docker/che-plugin-openshift-client/src/main/java/org/eclipse/che/plugin/openshift/client/OpenShiftConnector.java b/plugins/plugin-docker/che-plugin-openshift-client/src/main/java/org/eclipse/che/plugin/openshift/client/OpenShiftConnector.java index 9384fbb5d4..5f75107b6c 100644 --- a/plugins/plugin-docker/che-plugin-openshift-client/src/main/java/org/eclipse/che/plugin/openshift/client/OpenShiftConnector.java +++ b/plugins/plugin-docker/che-plugin-openshift-client/src/main/java/org/eclipse/che/plugin/openshift/client/OpenShiftConnector.java @@ -97,8 +97,6 @@ import io.fabric8.kubernetes.api.model.VolumeMount; import io.fabric8.kubernetes.api.model.VolumeMountBuilder; import io.fabric8.kubernetes.api.model.extensions.Deployment; import io.fabric8.kubernetes.api.model.extensions.DeploymentBuilder; -import io.fabric8.kubernetes.api.model.extensions.ReplicaSet; -import io.fabric8.kubernetes.api.model.extensions.ReplicaSetList; import io.fabric8.openshift.api.model.ImageStream; import io.fabric8.openshift.api.model.ImageStreamTag; import io.fabric8.openshift.client.DefaultOpenShiftClient; @@ -126,7 +124,8 @@ public class OpenShiftConnector extends DockerConnector { private static final String OPENSHIFT_SERVICE_TYPE_NODE_PORT = "NodePort"; private static final int OPENSHIFT_WAIT_POD_DELAY = 1000; private static final int OPENSHIFT_WAIT_POD_TIMEOUT = 240; - private static final int OPENSHIFT_IMAGESTREAM_MAX_WAIT = 10; // seconds + private static final int OPENSHIFT_IMAGESTREAM_WAIT_DELAY = 2000; + private static final int OPENSHIFT_IMAGESTREAM_MAX_WAIT_COUNT = 30; private static final String OPENSHIFT_POD_STATUS_RUNNING = "Running"; private static final String OPENSHIFT_DEPLOYMENT_LABEL = "deployment"; private static final String OPENSHIFT_IMAGE_PULL_POLICY_IFNOTPRESENT = "IfNotPresent"; @@ -214,18 +213,29 @@ public class OpenShiftConnector extends DockerConnector { String[] volumes = createContainerParams.getContainerConfig().getHostConfig().getBinds(); Map additionalLabels = createContainerParams.getContainerConfig().getLabels(); - createOpenShiftService(workspaceID, exposedPorts, additionalLabels); - String deploymentName = createOpenShiftDeployment(workspaceID, - dockerPullSpec, - containerName, - exposedPorts, - envVariables, - volumes, - runContainerAsRoot); + String containerID; + try { + createOpenShiftService(workspaceID, exposedPorts, additionalLabels); + String deploymentName = createOpenShiftDeployment(workspaceID, + dockerPullSpec, + containerName, + exposedPorts, + envVariables, + volumes, + runContainerAsRoot); - String containerID = waitAndRetrieveContainerID(deploymentName); - if (containerID == null) { - throw new OpenShiftException("Failed to get the ID of the container running in the OpenShift pod"); + containerID = waitAndRetrieveContainerID(deploymentName); + if (containerID == null) { + throw new OpenShiftException("Failed to get the ID of the container running in the OpenShift pod"); + } + } catch (IOException e) { + // Make sure we clean up deployment and service in case of an error -- otherwise Che can end up + // in an inconsistent state. + LOG.info("Error while creating Pod, removing deployment"); + String deploymentName = CHE_OPENSHIFT_RESOURCES_PREFIX + workspaceID; + cleanUpWorkspaceResources(deploymentName); + openShiftClient.resource(imageStreamTag).delete(); + throw e; } return new ContainerCreated(containerID, null); @@ -309,30 +319,8 @@ public class OpenShiftConnector extends DockerConnector { public void removeContainer(final RemoveContainerParams params) throws IOException { String containerId = params.getContainer(); Pod pod = getChePodByContainerId(containerId); - String deploymentName = pod.getMetadata().getLabels().get(OPENSHIFT_DEPLOYMENT_LABEL); - - Deployment deployment = getDeploymentByName(deploymentName); - ReplicaSet replicaSet = getReplicaSetByLabel(OPENSHIFT_DEPLOYMENT_LABEL, deploymentName); - Service service = getCheServiceBySelector(OPENSHIFT_DEPLOYMENT_LABEL, deploymentName); - - if (service != null) { - LOG.info("Removing OpenShift Service {}", service.getMetadata().getName()); - openShiftClient.resource(service).delete(); - } - - if (deployment != null) { - LOG.info("Removing OpenShift Deployment {}", deployment.getMetadata().getName()); - openShiftClient.resource(deployment).delete(); - } - - if (replicaSet != null) { - LOG.info("Removing Replica Set {}", replicaSet.getMetadata().getName()); - openShiftClient.resource(replicaSet).delete(); - } - - LOG.info("Removing OpenShift Pod {}", pod.getMetadata().getName()); - openShiftClient.resource(pod).delete(); + cleanUpWorkspaceResources(deploymentName); } @Override @@ -494,9 +482,9 @@ public class OpenShiftConnector extends DockerConnector { // Wait for Image metadata to be obtained. ImageStream createdImageStream; - for (int waitCount = 0; waitCount < OPENSHIFT_IMAGESTREAM_MAX_WAIT; waitCount++) { + for (int waitCount = 0; waitCount < OPENSHIFT_IMAGESTREAM_MAX_WAIT_COUNT; waitCount++) { try { - Thread.sleep(1000); + Thread.sleep(OPENSHIFT_IMAGESTREAM_WAIT_DELAY); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } @@ -536,7 +524,7 @@ public class OpenShiftConnector extends DockerConnector { * @see DockerConnector#tag(TagParams) */ @Override - public void tag(final TagParams params) throws ImageNotFoundException, IOException { + public void tag(final TagParams params) throws IOException { // E.g. `docker tag sourceImage targetImage` String paramsSourceImage = params.getImage(); // e.g. eclipse/ubuntu_jdk8 String targetImage = params.getRepository(); // e.g. eclipse-che/ @@ -691,28 +679,6 @@ public class OpenShiftConnector extends DockerConnector { return deployment; } - private ReplicaSet getReplicaSetByLabel(String labelKey, String labelValue) throws IOException { - ReplicaSetList replicaSetList = openShiftClient - .extensions().replicaSets() - .inNamespace(this.openShiftCheProjectName) - .withLabel(labelKey, labelValue) - .list(); - - List items = replicaSetList.getItems(); - - if (items.isEmpty()) { - LOG.warn("No ReplicaSet with label {}={} could be found", labelKey, labelValue); - throw new IOException("No ReplicaSet with label " + labelKey + "=" + labelValue + " could be found"); - } - - if (items.size() > 1) { - LOG.warn("Found more than one ReplicaSet with label {}={}", labelKey, labelValue); - throw new IOException("Found more than one ReplicaSet with label " + labelValue + "=" + labelValue); - } - - return items.get(0); - } - private Pod getChePodByContainerId(String containerId) throws IOException { PodList pods = openShiftClient.pods() .inNamespace(this.openShiftCheProjectName) @@ -864,31 +830,30 @@ public class OpenShiftConnector extends DockerConnector { private ImageStreamTag createImageStreamTag(String sourceImageWithTag, String imageStreamTagName) throws IOException { try { - ImageStreamTag imageStreamTag = openShiftClient.imageStreamTags() - .inNamespace(openShiftCheProjectName) - .createOrReplaceWithNew() - .withNewMetadata() - .withName(imageStreamTagName) - .endMetadata() - .withNewTag() - .withNewFrom() - .withKind("DockerImage") - .withName(sourceImageWithTag) - .endFrom() - .endTag() - .done(); + openShiftClient.imageStreamTags() + .inNamespace(openShiftCheProjectName) + .createOrReplaceWithNew() + .withNewMetadata() + .withName(imageStreamTagName) + .endMetadata() + .withNewTag() + .withNewFrom() + .withKind("DockerImage") + .withName(sourceImageWithTag) + .endFrom() + .endTag() + .done(); // Wait for image metadata to be pulled - for (int waitCount = 0; waitCount < OPENSHIFT_IMAGESTREAM_MAX_WAIT; waitCount++) { - Thread.sleep(1000); - + for (int waitCount = 0; waitCount < OPENSHIFT_IMAGESTREAM_MAX_WAIT_COUNT; waitCount++) { + Thread.sleep(OPENSHIFT_IMAGESTREAM_WAIT_DELAY); ImageStreamTag createdTag = openShiftClient.imageStreamTags() .inNamespace(openShiftCheProjectName) .withName(imageStreamTagName) .get(); if (createdTag != null) { LOG.info(String.format("Created ImageStreamTag %s in namespace %s", - imageStreamTag.getMetadata().getName(), + createdTag.getMetadata().getName(), openShiftCheProjectName)); return createdTag; } @@ -973,6 +938,43 @@ public class OpenShiftConnector extends DockerConnector { return info; } + + private void cleanUpWorkspaceResources(String deploymentName) throws IOException { + + Deployment deployment = getDeploymentByName(deploymentName); + Service service = getCheServiceBySelector(OPENSHIFT_DEPLOYMENT_LABEL, deploymentName); + + if (service != null) { + LOG.info("Removing OpenShift Service {}", service.getMetadata().getName()); + openShiftClient.resource(service).delete(); + } + + if (deployment != null) { + LOG.info("Removing OpenShift Deployment {}", deployment.getMetadata().getName()); + openShiftClient.resource(deployment).delete(); + } + + // Wait for all pods to terminate before returning. + try { + for (int waitCount = 0; waitCount < OPENSHIFT_WAIT_POD_TIMEOUT; waitCount++) { + List pods = openShiftClient.pods() + .inNamespace(openShiftCheProjectName) + .withLabel(OPENSHIFT_DEPLOYMENT_LABEL, deploymentName) + .list() + .getItems(); + if (pods.size() == 0) { + return; + } + Thread.sleep(OPENSHIFT_WAIT_POD_DELAY); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.info("Thread interrupted while cleaning up workspace"); + } + + throw new OpenShiftException("Timeout while waiting for pods to terminate"); + } + private List getVolumeMountsFrom(String[] volumes, String workspaceID) { List vms = new ArrayList<>(); for (String volume : volumes) {