diff --git a/files/netdata/docker-compose.template.yml b/files/netdata/docker-compose.template.yml index 597f160..d4a63f0 100644 --- a/files/netdata/docker-compose.template.yml +++ b/files/netdata/docker-compose.template.yml @@ -1,7 +1,7 @@ services: netdata: - image: netdata/netdata:v2.7.3 + image: netdata/netdata:v2.8.2 container_name: netdata restart: unless-stopped cap_add: diff --git a/files/netdata/go.d/fail2ban.conf b/files/netdata/go.d/fail2ban.conf index c78d740..fadd1bc 100644 --- a/files/netdata/go.d/fail2ban.conf +++ b/files/netdata/go.d/fail2ban.conf @@ -1,3 +1,3 @@ jobs: - name: fail2ban - update_every: 15 # Collect Fail2Ban jails statistics every 15 seconds + update_every: 60 # Collect Fail2Ban jails statistics every N seconds diff --git a/files/netdata/go.d/postgres.conf b/files/netdata/go.d/postgres.conf index 86c50dd..0dfd8b4 100644 --- a/files/netdata/go.d/postgres.conf +++ b/files/netdata/go.d/postgres.conf @@ -1,4 +1,4 @@ -update_every: 15 +update_every: 60 jobs: diff --git a/files/netdata/netdata.conf.j2 b/files/netdata/netdata.template.conf similarity index 91% rename from files/netdata/netdata.conf.j2 rename to files/netdata/netdata.template.conf index fbffa9f..90194c7 100644 --- a/files/netdata/netdata.conf.j2 +++ b/files/netdata/netdata.template.conf @@ -15,11 +15,15 @@ [global] # run as user = netdata # host access prefix = /host + + #| >>> [global].hostname <<< + #| datatype: text, default value: a7ccb73e566e + hostname = {{ host_name }} + # pthread stack size = 8MiB # cpu cores = 2 # libuv worker threads = 16 # profile = standalone - hostname = {{ host_name }} # glibc malloc arena max for plugins = 1 # glibc malloc arena max for netdata = 1 # crash reports = all @@ -30,12 +34,15 @@ # has unstable connection = no [db] + #| >>> [db].update every <<< + #| datatype: duration (seconds), default value: 1s + update every = 10s + # enable replication = yes # replication period = 1d # replication step = 1h # replication threads = 1 # replication prefetch = 10 - # update every = 1s # db = dbengine # memory deduplication (ksm) = auto # cleanup orphan hosts after = 1h @@ -47,7 +54,7 @@ # dbengine extent cache size = off # dbengine enable journal integrity check = no # dbengine use all ram for caches = no - # dbengine out of memory protection = 391.99MiB + # dbengine out of memory protection = 391.49MiB # dbengine use direct io = yes # dbengine journal v2 unmount time = 2m # dbengine pages per extent = 109 @@ -93,7 +100,7 @@ [environment variables] # PATH = /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin - # PYTHONPATH = + # PYTHONPATH = # TZ = :/etc/localtime [host labels] @@ -107,15 +114,15 @@ [ml] # enabled = auto - # maximum num samples to train = 21600 - # minimum num samples to train = 900 + # training window = 6h + # min training window = 15m + # max training vectors = 1440 + # max samples to smooth = 3 # train every = 3h # number of models per dimension = 18 # delete models older than = 7d # num samples to diff = 1 - # num samples to smooth = 3 # num samples to lag = 5 - # random sampling ratio = 0.20000 # maximum number of k-means iterations = 1000 # dimension anomaly score threshold = 0.99000 # host anomaly rate threshold = 1.00000 @@ -163,7 +170,7 @@ # timeout for first request = 1m # accept a streaming request every = off # respect do not track policy = no - # x-frame-options response header = + # x-frame-options response header = # allow connections from = localhost * # allow connections by dns = heuristic # allow dashboard from = localhost * @@ -181,7 +188,7 @@ # gzip compression level = 3 # ssl skip certificate verification = no # web server threads = 6 - # web server max sockets = 262144 + # web server max sockets = 131072 [registry] # enabled = no @@ -189,9 +196,9 @@ # registry log file = /var/lib/netdata/registry/registry-log.db # registry save db every new entries = 1000000 # registry expire idle persons = 1y - # registry domain = + # registry domain = # registry to announce = https://registry.my-netdata.io - # registry hostname = 7171b7f9fc69 + # registry hostname = rivendell # verify browser cookies support = yes # enable cookies SameSite and Secure = yes # max URL length = 1024 @@ -202,9 +209,29 @@ [pulse] # extended = no - # update every = 1s + # update every = 10s [plugins] + #| >>> [plugins].perf <<< + #| datatype: yes or no, default value: yes + perf = no + + #| >>> [plugins].python.d <<< + #| datatype: yes or no, default value: yes + python.d = no + + #| >>> [plugins].charts.d <<< + #| datatype: yes or no, default value: yes + charts.d = no + + #| >>> [plugins].otel <<< + #| datatype: yes or no, default value: yes + otel = no + + #| >>> [plugins].statsd <<< + #| datatype: yes or no, default value: yes + statsd = no + # idlejitter = yes # netdata pulse = yes # profile = no @@ -213,23 +240,20 @@ # proc = yes # cgroups = yes # timex = yes - # statsd = yes # enable running new plugins = yes # check for new plugins every = 1m # slabinfo = no # freeipmi = no - # python.d = yes - # go.d = yes - # apps = yes - # systemd-journal = yes - # network-viewer = yes - # charts.d = yes # debugfs = yes - # perf = yes # ioping = yes + # network-viewer = yes + # apps = yes + # go.d = yes + # systemd-units = yes + # systemd-journal = yes [statsd] - # update every (flushInterval) = 1s + # update every (flushInterval) = 10s # udp messages to process at once = 10 # create private charts for metrics matching = * # max private charts hard limit = 1000 @@ -247,10 +271,7 @@ # gaps on histograms (deleteHistograms) = no # gaps on timers (deleteTimers) = no # gaps on dictionaries (deleteDictionaries) = no - # statsd server max TCP sockets = 262144 - # listen backlog = 4096 - # default port = 8125 - # bind to = udp:localhost tcp:localhost + # statsd server max TCP sockets = 131072 [plugin:idlejitter] # loop time = 20ms @@ -300,22 +321,32 @@ # /sys/class/drm = yes [plugin:cgroups] - # update every = 1s - # check for new cgroups every = 10s + #| >>> [plugin:cgroups].update every <<< + #| datatype: duration (seconds), default value: 10s + update every = 20s + + #| >>> [plugin:cgroups].check for new cgroups every <<< + #| datatype: duration (seconds), default value: 10s + check for new cgroups every = 20s + # use unified cgroups = auto # max cgroups to allow = 1000 # max cgroups depth to monitor = 0 - # enable by default cgroups matching = !*/init.scope !/system.slice/run-*.scope *user.slice/docker-* !*user.slice* *.scope !/machine.slice/*/.control !/machine.slice/*/payload* !/machine.slice/*/supervisor /machine.slice/*.service */kubepods/pod*/* */kubepods/*/pod*/* */*-kubepods-pod*/* */*-kubepods-*-pod*/* !*kubepods* !*kubelet* !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.service/udev !*.socket !*.slice !*.swap !*.user !/ !/docker !*/libvirt !/lxc !/lxc/*/* !/lxc.monitor* !/lxc.pivot !/lxc.payload !*lxcfs.service/.control !/machine !/qemu !/system !/systemd !/user * - # enable by default cgroups names matching = * - # search for cgroups in subpaths matching = !*/init.scope !*-qemu !*.libvirt-qemu !/init.scope !/system !/systemd !/user !/lxc/*/* !/lxc.monitor !/lxc.payload/*/* !/lxc.payload.* * + # enable by default cgroups matching = !*/init.scope !/system.slice/run-*.scope *user.slice/docker-* !*user.slice* *.scope !/machine.slice/*/.control !/machine.slice/*/payload* !/machine.slice/*/supervisor /machine.slice/*.service */kubepods/pod*/* */kubepods/*/pod*/* */*-kubepods-pod*/* */*-kubepods-*-pod*/* !*kubepods* !*kubelet* !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.service/udev !*.socket !*.slice !*.swap !*.user !/ !/docker !*/libvirt !/lxc !/lxc/*/* !/lxc.monitor* !/lxc.pivot !/lxc.payload !*lxcfs.service/.control !/machine !/qemu !/system !/systemd !/user * + # enable by default cgroups names matching = * + # search for cgroups in subpaths matching = !*/init.scope !*-qemu !*.libvirt-qemu !/init.scope !/system !/systemd !/user !/lxc/*/* !/lxc.monitor !/lxc.payload/*/* !/lxc.payload.* * # script to get cgroup names = /usr/libexec/netdata/plugins.d/cgroup-name.sh # script to get cgroup network interfaces = /usr/libexec/netdata/plugins.d/cgroup-network - # run script to rename cgroups matching = !/ !*.mount !*.socket !*.partition /machine.slice/*.service !*.service !*.slice !*.swap !*.user !init.scope !*.scope/vcpu* !*.scope/emulator *.scope *docker* *lxc* *qemu* */kubepods/pod*/* */kubepods/*/pod*/* */*-kubepods-pod*/* */*-kubepods-*-pod*/* !*kubepods* !*kubelet* *.libvirt-qemu * - # cgroups to match as systemd services = !/system.slice/*/*.service /system.slice/*.service + # run script to rename cgroups matching = !/ !*.mount !*.socket !*.partition /machine.slice/*.service !*.service !*.slice !*.swap !*.user !init.scope !*.scope/vcpu* !*.scope/emulator *.scope *docker* *lxc* *qemu* */kubepods/pod*/* */kubepods/*/pod*/* */*-kubepods-pod*/* */*-kubepods-*-pod*/* !*kubepods* !*kubelet* *.libvirt-qemu * + # cgroups to match as systemd services = !/system.slice/*/*.service /system.slice/*.service [plugin:proc:diskspace] + #| >>> [plugin:proc:diskspace].update every <<< + #| reformatted from: 60s + #| datatype: duration (seconds), default value: 10s + update every = 1m + # remove charts of unmounted disks = yes - # update every = 1s # check for new mount points every = 15s # exclude space metrics on paths = /dev /dev/shm /proc/* /sys/* /var/run/user/* /run/lock /run/user/* /snap/* /var/lib/docker/* /var/lib/containers/storage/* /run/credentials/* /run/containerd/* /rpool /rpool/* # exclude space metrics on filesystems = *gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs cgroup cgroup2 hugetlbfs devtmpfs fuse.lxcfs @@ -326,41 +357,29 @@ [plugin:tc] # script to run to get tc values = /usr/libexec/netdata/plugins.d/tc-qos-helper.sh -[plugin:python.d] - # update every = 1s - # command options = - [plugin:go.d] - # update every = 1s - # command options = + # update every = 10s + # command options = [plugin:apps] - # update every = 1s - # command options = + # update every = 10s + # command options = [plugin:systemd-journal] - # update every = 1s - # command options = + # update every = 10s + # command options = [plugin:network-viewer] - # update every = 1s - # command options = - -[plugin:charts.d] - # update every = 1s - # command options = + # update every = 10s + # command options = [plugin:debugfs] - # update every = 1s - # command options = - -[plugin:perf] - # update every = 1s - # command options = + # update every = 10s + # command options = [plugin:ioping] - # update every = 1s - # command options = + # update every = 10s + # command options = [plugin:proc:/proc/net/dev] # compressed packets for all interfaces = no @@ -580,7 +599,7 @@ # hardware packets counters = auto # hardware errors counters = auto # monitor only active ports = auto - # disable by default interfaces matching = + # disable by default interfaces matching = # refresh ports state every = 30s [plugin:proc:/proc/net/stat/nf_conntrack] @@ -635,7 +654,7 @@ # preferred disk ids = * # exclude disks = loop* ram* # filename to monitor = /host/proc/diskstats - # performance metrics for disks with major 252 = yes + # performance metrics for disks with major 253 = yes [plugin:proc:/proc/mdstat] # faulty devices = yes @@ -685,3 +704,7 @@ [plugin:proc:/sys/class/drm] # directory to monitor = /host/sys/class/drm + +[plugin:systemd-units] + # update every = 10s + # command options = diff --git a/playbook-netdata.yml b/playbook-netdata.yml index 82ee106..de0461d 100644 --- a/playbook-netdata.yml +++ b/playbook-netdata.yml @@ -41,7 +41,7 @@ - name: "Copy netdata config file" ansible.builtin.template: - src: "files/{{ app_name }}/netdata.conf.j2" + src: "files/{{ app_name }}/netdata.template.conf" dest: "{{ config_dir }}/netdata.conf" owner: "{{ app_user }}" group: "{{ app_user }}"