Skip to content

[Bug]: constant [low] CPU usage when idle #3719

@smemsh

Description

@smemsh

zot version

v2.1.13

Describe the bug

When idle (no network activity, periodic actions disabled, console log tail with debug level is silent), and no users from inception, Zot nonetheless is always using CPU, instead of blocking until there is work to do. It's not clear why it needs to do anything except sleep. This is not a huge total amount: about 1 minutes CPU time accumulates every 3 hours, but these are constant calls with small timeouts. What is it doing? It's keeping my system from sleeping, etc. Shouldn't Zot just block until there's something to do?

Here is a 5 second call count (strace -c) of completely idle Zot process:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ------------------
 52.54    0.051176         168       303        99 futex
 17.79    0.017327         173       100           nanosleep
 17.00    0.016562          32       514           epoll_pwait
 11.80    0.011490           8      1369           clock_gettime
  0.32    0.000311           9        32           write
  0.26    0.000255           7        32           read
  0.15    0.000144         144         1         1 restart_syscall
  0.05    0.000053          10         5           sched_yield
  0.05    0.000046           9         5           pread64
  0.04    0.000041           8         5           sched_getaffinity
------ ----------- ----------- --------- --------- ------------------
100.00    0.097405          41      2366       100 total

This is happening no matter when I sample it, about the same counts. It's looping basically doing this repeatedly, here shown as 150ms syscall snapshot:

[pid 455934] 05:56:23 (+     0.000000) futex(0xc00121d158, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455933] 05:56:23 (+     0.000025) futex(0x5ba53290bbf8, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455932] 05:56:23 (+     0.000007) futex(0xc00022a158, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455931] 05:56:23 (+     0.000008) futex(0xc0000e1958, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455930] 05:56:23 (+     0.000013) futex(0xc000108158, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455929] 05:56:23 (+     0.000011) epoll_pwait(5,  <unfinished ...>
[pid 455928] 05:56:23 (+     0.000011) futex(0x5ba53290c4a0, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455927] 05:56:23 (+     0.000011) restart_syscall(<... resuming interrupted read ...> <unfinished ...>
[pid 455919] 05:56:23 (+     0.000007) futex(0x5ba5328dc4f0, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455929] 05:56:23 (+     0.000015) <... epoll_pwait resumed>[], 128, 0, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000034) epoll_pwait(5, [], 128, 13, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.013215) epoll_pwait(5, [], 128, 0, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000083) epoll_pwait(5,  <unfinished ...>
[pid 455927] 05:56:23 (+     0.000164) <... restart_syscall resumed>) = -1 ETIMEDOUT (Connection timed out)
[pid 455927] 05:56:23 (+     0.000157) nanosleep({tv_sec=0, tv_nsec=10000000},  <unfinished ...>
[pid 455929] 05:56:23 (+     0.000841) <... epoll_pwait resumed>[], 128, 1, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000093) epoll_pwait(5, [], 128, 0, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000060) futex(0xc00022a158, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 455932] 05:56:23 (+     0.000070) <... futex resumed>) = 0
[pid 455929] 05:56:23 (+     0.000022) <... futex resumed>) = 1
[pid 455932] 05:56:23 (+     0.000035) epoll_pwait(5, [], 128, 0, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000042) epoll_pwait(5,  <unfinished ...>
[pid 455932] 05:56:23 (+     0.000024) write(6, "\1\0\0\0\0\0\0\0", 8) = 8
[pid 455929] 05:56:23 (+     0.000057) <... epoll_pwait resumed>[{events=EPOLLIN, data={u32=848332328, u64=100765076063784}}], 128, -1, NULL, 0) = 1
[pid 455932] 05:56:23 (+     0.000021) futex(0xc00022a158, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455929] 05:56:23 (+     0.000017) read(6, "\1\0\0\0\0\0\0\0", 8) = 8
[pid 455929] 05:56:23 (+     0.000047) epoll_pwait(5, [], 128, 0, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000049) epoll_pwait(5,  <unfinished ...>
[pid 455927] 05:56:23 (+     0.008771) <... nanosleep resumed>NULL) = 0
[pid 455927] 05:56:23 (+     0.000028) futex(0x5ba5328df950, FUTEX_WAIT_PRIVATE, 0, {tv_sec=0, tv_nsec=39577737} <unfinished ...>
[pid 455929] 05:56:23 (+     0.039476) <... epoll_pwait resumed>[], 128, 48, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000271) futex(0xc00022a158, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 455927] 05:56:23 (+     0.000073) <... futex resumed>) = -1 ETIMEDOUT (Connection timed out)
[pid 455932] 05:56:23 (+     0.000037) <... futex resumed>) = 0
[pid 455929] 05:56:23 (+     0.000031) <... futex resumed>) = 1
[pid 455927] 05:56:23 (+     0.000040) nanosleep({tv_sec=0, tv_nsec=10000000},  <unfinished ...>
[pid 455932] 05:56:23 (+     0.000065) epoll_pwait(5, [], 128, 0, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000593) epoll_pwait(5,  <unfinished ...>
[pid 455932] 05:56:23 (+     0.000039) futex(0xc00022a158, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455927] 05:56:23 (+     0.009615) <... nanosleep resumed>NULL) = 0
[pid 455927] 05:56:23 (+     0.000060) futex(0x5ba5328df950, FUTEX_WAIT_PRIVATE, 0, {tv_sec=0, tv_nsec=39287307} <unfinished ...>
[pid 455929] 05:56:23 (+     0.039562) <... epoll_pwait resumed>[], 128, 49, NULL, 0) = 0
[pid 455927] 05:56:23 (+     0.000028) <... futex resumed>) = -1 ETIMEDOUT (Connection timed out)
[pid 455929] 05:56:23 (+     0.000102) epoll_pwait(5,  <unfinished ...>
[pid 455927] 05:56:23 (+     0.000122) nanosleep({tv_sec=0, tv_nsec=10000000},  <unfinished ...>
[pid 455929] 05:56:23 (+     0.000053) <... epoll_pwait resumed>[], 128, 0, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000370) futex(0xc00022a158, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 455932] 05:56:23 (+     0.000095) <... futex resumed>) = 0
[pid 455929] 05:56:23 (+     0.000018) <... futex resumed>) = 1
[pid 455932] 05:56:23 (+     0.000023) epoll_pwait(5,  <unfinished ...>
[pid 455929] 05:56:23 (+     0.000047) epoll_pwait(5,  <unfinished ...>
[pid 455932] 05:56:23 (+     0.000143) <... epoll_pwait resumed>[], 128, 0, NULL, 0) = 0
[pid 455932] 05:56:23 (+     0.000117) futex(0xc000108158, FUTEX_WAKE_PRIVATE, 1) = 1
[pid 455930] 05:56:23 (+     0.000054) <... futex resumed>) = 0
[pid 455932] 05:56:23 (+     0.000098) futex(0xc00022a158, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455930] 05:56:23 (+     0.000029) write(6, "\1\0\0\0\0\0\0\0", 8) = 8
[pid 455929] 05:56:23 (+     0.000084) <... epoll_pwait resumed>[{events=EPOLLIN, data={u32=848332328, u64=100765076063784}}], 128, -1, NULL, 0) = 1
[pid 455930] 05:56:23 (+     0.000038) futex(0xc000108158, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 455929] 05:56:23 (+     0.000034) read(6, "\1\0\0\0\0\0\0\0", 8) = 8
[pid 455929] 05:56:23 (+     0.000089) epoll_pwait(5, [], 128, 0, NULL, 0) = 0
[pid 455929] 05:56:23 (+     0.000077) epoll_pwait(5,  <unfinished ...>
[pid 455927] 05:56:23 (+     0.008879) <... nanosleep resumed>NULL) = 0
[pid 455927] 05:56:23 (+     0.000069) futex(0x5ba5328df950, FUTEX_WAIT_PRIVATE, 0, {tv_sec=0, tv_nsec=39152382} <detached ...>

the file descriptors for zot in that trace looks like this:

0 -> /dev/pts/0
1 -> /dev/pts/0
2 -> /dev/pts/0
3 -> /sys/fs/cgroup/cpu.max
4 -> /dev/tty
5 -> anon_inode:[eventpoll]
6 -> anon_inode:[eventfd]
7 -> /dev/tty
8 -> anon_inode:inotify
9 -> /var/lib/registry/cache.db
10 -> /var/lib/registry/meta.db
11 -> anon_inode:inotify
12 -> socket:[11758389]=

so this is eventpoll/eventfd, but if zot needs to wake up and check something, not sure why the sleep interval is so tiny (and also why it wouldn't be event-triggered instead of having to keep polling repeatedly)

To reproduce

rcfile looks like this (tried disabling also metrics and search, but no change), it's just a read-through cache:

{
  "extensions": {
    "events": {
      "enable": false
    },
    "lint": {
      "enable": false
    },
    "metrics": {
      "enable": true
    },
    "scrub": {
      "enable": false
    },
    "search": {
      "enable": true
    },
    "sync": {
      "enable": true,
      "registries": [
        {
          "content": [
            {
              "destination": "/ghcr",
              "prefix": "**"
            }
          ],
          "onDemand": true,
          "preserveDigest": true,
          "tlsVerify": false,
          "urls": [
            "https://ghcr.io"
          ]
        },
        {
          "content": [
            {
              "destination": "/docker",
              "prefix": "**"
            }
          ],
          "onDemand": true,
          "preserveDigest": true,
          "tlsVerify": false,
          "urls": [
            "https://docker.io"
          ]
        },
        {
          "content": [
            {
              "destination": "/gcr",
              "prefix": "**"
            }
          ],
          "onDemand": true,
          "preserveDigest": true,
          "tlsVerify": false,
          "urls": [
            "https://gcr.io"
          ]
        },
        {
          "content": [
            {
              "destination": "/registry.k8s",
              "prefix": "**"
            }
          ],
          "onDemand": true,
          "preserveDigest": true,
          "tlsVerify": false,
          "urls": [
            "https://registry.k8s.io"
          ]
        },
        {
          "content": [
            {
              "destination": "/quay",
              "prefix": "**"
            }
          ],
          "onDemand": true,
          "preserveDigest": true,
          "tlsVerify": false,
          "urls": [
            "https://quay.io"
          ]
        }
      ]
    },
    "trust": {
      "enable": false
    },
    "ui": {
      "enable": false
    }
  },
  "http": {
    "address": "0.0.0.0",
    "compat": [
      "docker2s2"
    ],
    "port": 5000
  },
  "log": {
    "level": "debug"
  },
  "storage": {
    "gc": false,
    "rootDirectory": "/var/lib/registry"
  }
}

I'm running this from the ghcr.io/project-zot/zot-linux-amd64 with fingerprint d33d05b2170c

Expected behavior

sleep when idle like other network servers.

Screenshots

No response

Additional context

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingrm-externalRoadmap item submitted by non-maintainers

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions