Unverified Commit 68ff6cdd authored by Kroese's avatar Kroese Committed by GitHub
Browse files

feat: Ballooning monitors QEMU process (#1097)

parent 118d9a56
Loading
Loading
Loading
Loading
+34 −4
Original line number Diff line number Diff line
@@ -213,9 +213,24 @@ async def _get_host_qemu_guest_mem_rss(qmp: QMPClient, qemu_pid: int) -> Optiona
# QMP helpers using qemu.qmp
# ==========================================================

async def qmp_wait_connected(sock_path: str, interval: int = 5) -> QMPClient:
    """Create and connect a QMPClient, retrying until successful."""
def _is_process_alive(pid: int) -> bool:
    """Check if a process with the given PID is still running."""
    try:
        os.kill(pid, 0)
        return True
    except ProcessLookupError:
        return False
    except PermissionError:
        return True
    except OSError:
        return False


async def qmp_wait_connected(sock_path: str, interval: int = 5, qemu_pid: int = -1) -> QMPClient:
    """Create and connect a QMPClient, retrying until successful or QEMU exits."""
    while True:
        if qemu_pid > 0 and not _is_process_alive(qemu_pid):
            raise ConnectionError("QEMU process (pid %d) is no longer running" % qemu_pid)
        qmp = QMPClient("balloon-monitor")
        try:
            await qmp.connect(sock_path)
@@ -367,7 +382,7 @@ class BalloonMonitor:
        )

    async def _qmp_connect(self) -> None:
        self.qmp = await qmp_wait_connected(self.args.qmp_sock, self.args.interval)
        self.qmp = await qmp_wait_connected(self.args.qmp_sock, self.args.interval, self.qemu_pid)
        self.event_task = asyncio.create_task(self._qmp_event_listener())
        self._record_balloon(await qmp_get_actual_balloon(self.qmp))

@@ -389,6 +404,10 @@ class BalloonMonitor:
            log.debug("QMP event listener stopped: %s", e)

    async def _qmp_reconnect(self) -> None:
        if self.qemu_pid > 0 and not _is_process_alive(self.qemu_pid):
            log.info("QEMU process (pid %d) has exited, shutting down.", self.qemu_pid)
            self._stop.set()
            return
        if self.qmp:
            try:
                await self.qmp.disconnect()
@@ -759,15 +778,20 @@ def main() -> None:
        datefmt="%Y-%m-%d %H:%M:%S",
    )

    qmp_logger = logging.getLogger("qemu.qmp")
    # Suppress noisy qemu.qmp internal messages (e.g. "end-of-file") unless debug is requested
    qmp_logger.setLevel(logging.WARNING)

    if args.debug is not None:
        targets = {t.strip() for t in args.debug.split(",")}
        if "all" in targets:
            logging.getLogger().setLevel(logging.DEBUG)
            qmp_logger.setLevel(logging.DEBUG)
        else:
            if "controller" in targets:
                logging.getLogger(__name__).setLevel(logging.DEBUG)
            if "qmp" in targets:
                logging.getLogger("qemu.qmp").setLevel(logging.DEBUG)
                qmp_logger.setLevel(logging.DEBUG)

    monitor = BalloonMonitor(args)
    asyncio.run(monitor.start())
@@ -778,3 +802,9 @@ if __name__ == "__main__":
    except KeyboardInterrupt:
        log.debug("Monitor stopped by user.")
        sys.exit(0)
    except ConnectionError as e:
        log.info("Monitor exiting: %s", e)
        sys.exit(0)
    except Exception as e:
        log.error("Monitor terminated unexpectedly: %s", e, exc_info=True)
        sys.exit(1)
+2 −2
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ finish() {
          143 ) display="SIGTERM" ;;
        esac
        echo && error "Forcefully terminating $(app), reason: $display..."
        { kill -9 -- "$pid" || :; } 2>/dev/null
        { disown "$pid" || :; kill -9 -- "$pid" || :; } 2>/dev/null
      fi
    fi
  fi
@@ -54,7 +54,7 @@ finish() {
  mKill "${pids[@]}"
  closeNetwork

  if [ -n "$pid" ] && ! waitPid "$pid" 100; then
  if ! waitPidFile "$QEMU_PID" 10; then
    warn "Timed out while waiting for $(app) to exit!"
  fi

+39 −18
Original line number Diff line number Diff line
@@ -40,23 +40,44 @@ isAlive() {
waitPid() {
  local i=0
  local pid="$1"
  local timeout="$2"
  local timeout="${2:-10}"

  while [ -n "$pid" ] && isAlive "$pid"; do
    sleep 0.2
    i=$((i + 1))
    [ "$i" -ge "$timeout" ] && return 1
    (( i >= timeout * 5 )) && return 1
  done

  return 0
}

waitPidFile() {
  local i=0
  local pid=""
  local file="$1"
  local timeout="${2:-10}"

  [ ! -s "$file" ] && return 0
  ! read -r pid <"$file" && return 0
  [ -z "$pid" ] && return 0

  while [ -s "$file" ] && isAlive "$pid"; do
    sleep 0.2
    i=$((i + 1))
    (( i >= timeout * 5 )) && return 1
  done

  rm -f -- "$file"
  return 0
}

pKill() {
  local pid="$1"
  local timeout="${2:-10}"

  { kill -15 -- "$pid" || :; } 2>/dev/null

  if ! waitPid "$pid" 50; then
  if ! waitPid "$pid" "$timeout"; then
    warn "Timed out while waiting for PID $pid"
  fi

@@ -64,13 +85,16 @@ pKill() {
}

fWait() {
  local name="$1" i=0
  local i=0
  local name="$1"
  local timeout="${2:-10}"

  [ -z "$name" ] && return 0

  while pgrep -f -l "$name" >/dev/null; do
    sleep 0.2
    i=$((i + 1))
    if [ "$i" -ge 50 ]; then
    if (( i >= timeout * 5 )); then
      warn "Timed out while waiting for process: $name"
      break
    fi
@@ -81,21 +105,25 @@ fWait() {

fKill() {
  local name="$1"
  local timeout="${2:-10}"

  [ -z "$name" ] && return 0

  { pkill -f "$name" || :; } 2>/dev/null
  fWait "$name"
  fWait "$name" "$timeout"

  return 0
}

sKill() {
  local file="$1" pid=""
  local pid=""
  local file="$1"

  [ ! -s "$file" ] && return 0
  ! read -r pid <"$file" && return 0
  [ -z "$pid" ] && return 0

  if [ -n "$pid" ] && isAlive "$pid"; then
  if isAlive "$pid"; then
    { kill -15 -- "$pid" || :; } 2>/dev/null
  fi

@@ -103,24 +131,17 @@ sKill() {
}

mKill() {
  local pid="" files=("$@")
  local timeout=10
  local files=("$@")

  for file in "${files[@]}"; do
    sKill "$file"
  done

  for file in "${files[@]}"; do

    [ ! -s "$file" ] && continue
    ! read -r pid <"$file" && continue
    [ -z "$pid" ] && continue

    if waitPid "$pid" 50; then
      rm -f -- "$file"
    else
    if ! waitPidFile "$file" "$timeout"; then
      warn "Timed out while waiting for PID file: $file"
    fi

  done

  return 0