diff --git a/VERSION b/VERSION index 080c74d0..d0203937 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1!10.5.0 +1!10.5.1 diff --git a/pycloudlib/lxd/instance.py b/pycloudlib/lxd/instance.py index 45aa0ed3..cf2df7b2 100644 --- a/pycloudlib/lxd/instance.py +++ b/pycloudlib/lxd/instance.py @@ -254,7 +254,9 @@ def delete(self, wait=True) -> List[Exception]: self._log.debug("deleting %s", self.name) try: - subp(["lxc", "delete", self.name, "--force"]) + instrument_unmount_failure( + subp(["lxc", "delete", self.name, "--force"], rcs=[0, 1]) + ) except RuntimeError as e: if "Instance not found" not in str(e): return [e] @@ -367,7 +369,7 @@ def _do_restart(self, force=False, **kwargs): cmd = ["lxc", "restart", self.name] if force: cmd.append("--force") - subp(cmd) + instrument_unmount_failure(subp(cmd, rcs=[0, 1])) def restore(self, snapshot_name): """Restore instance from a specific snapshot. @@ -561,3 +563,26 @@ def _wait_for_instance_start(self): super()._wait_for_instance_start() else: self.wait_for_state(desired_state="RUNNING", num_retries=200) + + +def instrument_unmount_failure(result): + """Temporary instrumentation. + + introduced to aid debugging ephemeral failures in which lxd fails to + unmount during instance deletion - currently checks for open files, if + insufficient, further checks of the busy mountpoint may be required + """ + if result.return_code: + mount = re.findall( + "Failed unmounting instance: Failed to unmount", + result.stderr, + ) + if mount: + files = subp(["lsof", "+f", "--", mount[0]]) + else: + files = "failure parsing lxd mount error" + + raise RuntimeError( + "Failure (rc=%s): %s, open files: [%s]" + % (result.return_code, result.stderr, files) + ) diff --git a/pycloudlib/result.py b/pycloudlib/result.py index 9bbc511c..60e3d299 100644 --- a/pycloudlib/result.py +++ b/pycloudlib/result.py @@ -24,6 +24,11 @@ def __bool__(self): """Boolean behavior.""" return self.ok + def __str__(self): + return "rc: '{}'\nstdout: '{}'\nstderr: '{}'".format( + self.return_code, self.return_code, self.stderr + ) + @property def failed(self): """Return boolean if result was failure.""" diff --git a/tests/unit_tests/lxd/test_instance.py b/tests/unit_tests/lxd/test_instance.py index 0583e712..7ec6597b 100644 --- a/tests/unit_tests/lxd/test_instance.py +++ b/tests/unit_tests/lxd/test_instance.py @@ -648,6 +648,7 @@ class TestRestart: @mock.patch("pycloudlib.lxd.instance.subp") def test_restart_calls_lxc_cmd_with_force_param(self, m_subp, force): """Honor force param on restart.""" + m_subp.return_value = Result("", "", 0) instance = LXDInstance(name="my_vm") instance._do_restart(force=force) # pylint: disable=protected-access if force: @@ -657,8 +658,9 @@ def test_restart_calls_lxc_cmd_with_force_param(self, m_subp, force): @mock.patch("pycloudlib.lxd.instance.LXDInstance.shutdown") @mock.patch("pycloudlib.lxd.instance.subp") - def test_restart_does_not_shutdown(self, _m_subp, m_shutdown): + def test_restart_does_not_shutdown(self, m_subp, m_shutdown): """Don't shutdown (stop) instance on restart.""" + m_subp.return_value = Result("", "", 0) instance = LXDInstance(name="my_vm") instance._do_restart() # pylint: disable=protected-access assert not m_shutdown.called @@ -866,6 +868,7 @@ def test_delete_on_ephemeral_instance_calls_shutdown(self, m_subp, m_shutdown, i Also verify is delete is actually called if instance is not ephemeral. """ + m_subp.return_value = Result("", "", 0) instance = LXDInstance(name="test") with mock.patch.object(type(instance), "ephemeral", is_ephemeral):