Netdata: upgrade to 2.8.2

Tune config, setup update every 10s instead of 1s
2025-12-13 14:46:15 +03:00
parent 10d67861a0
commit 81f693938e
5 changed files with 87 additions and 64 deletions
--- a/files/netdata/docker-compose.template.yml
+++ b/files/netdata/docker-compose.template.yml
@@ -1,7 +1,7 @@
 services:

  netdata:
-    image: netdata/netdata:v2.7.3
+    image: netdata/netdata:v2.8.2
    container_name: netdata
    restart: unless-stopped
    cap_add:
--- a/files/netdata/go.d/fail2ban.conf
+++ b/files/netdata/go.d/fail2ban.conf
@@ -1,3 +1,3 @@
 jobs:
  - name: fail2ban
-    update_every: 15  # Collect Fail2Ban jails statistics every 15 seconds
+    update_every: 60  # Collect Fail2Ban jails statistics every N seconds
--- a/files/netdata/go.d/postgres.conf
+++ b/files/netdata/go.d/postgres.conf
@@ -1,4 +1,4 @@
-update_every: 15
+update_every: 60

 jobs:
  
--- a/files/netdata/netdata.template.conf
+++ b/files/netdata/netdata.template.conf
@@ -15,11 +15,15 @@
 [global]
 	# run as user = netdata
 	# host access prefix = /host
+
+	#| >>> [global].hostname <<<
+	#| datatype: text, default value: a7ccb73e566e
+	hostname = {{ host_name }}
+
 	# pthread stack size = 8MiB
 	# cpu cores = 2
 	# libuv worker threads = 16
 	# profile = standalone
-	hostname = {{ host_name }}
 	# glibc malloc arena max for plugins = 1
 	# glibc malloc arena max for netdata = 1
 	# crash reports = all
@@ -30,12 +34,15 @@
 	# has unstable connection = no

 [db]
+	#| >>> [db].update every <<<
+	#| datatype: duration (seconds), default value: 1s
+	update every = 10s
+
 	# enable replication = yes
 	# replication period = 1d
 	# replication step = 1h
 	# replication threads = 1
 	# replication prefetch = 10
-	# update every = 1s
 	# db = dbengine
 	# memory deduplication (ksm) = auto
 	# cleanup orphan hosts after = 1h
@@ -47,7 +54,7 @@
 	# dbengine extent cache size = off
 	# dbengine enable journal integrity check = no
 	# dbengine use all ram for caches = no
-	# dbengine out of memory protection = 391.99MiB
+	# dbengine out of memory protection = 391.49MiB
 	# dbengine use direct io = yes
 	# dbengine journal v2 unmount time = 2m
 	# dbengine pages per extent = 109
@@ -93,7 +100,7 @@

 [environment variables]
 	# PATH = /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin
-	# PYTHONPATH = 
+	# PYTHONPATH =
 	# TZ = :/etc/localtime

 [host labels]
@@ -107,15 +114,15 @@

 [ml]
 	# enabled = auto
-	# maximum num samples to train = 21600
-	# minimum num samples to train = 900
+	# training window = 6h
+	# min training window = 15m
+	# max training vectors = 1440
+	# max samples to smooth = 3
 	# train every = 3h
 	# number of models per dimension = 18
 	# delete models older than = 7d
 	# num samples to diff = 1
-	# num samples to smooth = 3
 	# num samples to lag = 5
-	# random sampling ratio = 0.20000
 	# maximum number of k-means iterations = 1000
 	# dimension anomaly score threshold = 0.99000
 	# host anomaly rate threshold = 1.00000
@@ -163,7 +170,7 @@
 	# timeout for first request = 1m
 	# accept a streaming request every = off
 	# respect do not track policy = no
-	# x-frame-options response header = 
+	# x-frame-options response header =
 	# allow connections from = localhost *
 	# allow connections by dns = heuristic
 	# allow dashboard from = localhost *
@@ -181,7 +188,7 @@
 	# gzip compression level = 3
 	# ssl skip certificate verification = no
 	# web server threads = 6
-	# web server max sockets = 262144
+	# web server max sockets = 131072

 [registry]
 	# enabled = no
@@ -189,9 +196,9 @@
 	# registry log file = /var/lib/netdata/registry/registry-log.db
 	# registry save db every new entries = 1000000
 	# registry expire idle persons = 1y
-	# registry domain = 
+	# registry domain =
 	# registry to announce = https://registry.my-netdata.io
-	# registry hostname = 7171b7f9fc69
+	# registry hostname = rivendell
 	# verify browser cookies support = yes
 	# enable cookies SameSite and Secure = yes
 	# max URL length = 1024
@@ -202,9 +209,29 @@

 [pulse]
 	# extended = no
-	# update every = 1s
+	# update every = 10s

 [plugins]
+	#| >>> [plugins].perf <<<
+	#| datatype: yes or no, default value: yes
+	perf = no
+
+	#| >>> [plugins].python.d <<<
+	#| datatype: yes or no, default value: yes
+	python.d = no
+
+	#| >>> [plugins].charts.d <<<
+	#| datatype: yes or no, default value: yes
+	charts.d = no
+
+	#| >>> [plugins].otel <<<
+	#| datatype: yes or no, default value: yes
+	otel = no
+
+	#| >>> [plugins].statsd <<<
+	#| datatype: yes or no, default value: yes
+	statsd = no
+
 	# idlejitter = yes
 	# netdata pulse = yes
 	# profile = no
@@ -213,23 +240,20 @@
 	# proc = yes
 	# cgroups = yes
 	# timex = yes
-	# statsd = yes
 	# enable running new plugins = yes
 	# check for new plugins every = 1m
 	# slabinfo = no
 	# freeipmi = no
-	# python.d = yes
-	# go.d = yes
-	# apps = yes
-	# systemd-journal = yes
-	# network-viewer = yes
-	# charts.d = yes
 	# debugfs = yes
-	# perf = yes
 	# ioping = yes
+	# network-viewer = yes
+	# apps = yes
+	# go.d = yes
+	# systemd-units = yes
+	# systemd-journal = yes

 [statsd]
-	# update every (flushInterval) = 1s
+	# update every (flushInterval) = 10s
 	# udp messages to process at once = 10
 	# create private charts for metrics matching = *
 	# max private charts hard limit = 1000
@@ -247,10 +271,7 @@
 	# gaps on histograms (deleteHistograms) = no
 	# gaps on timers (deleteTimers) = no
 	# gaps on dictionaries (deleteDictionaries) = no
-	# statsd server max TCP sockets = 262144
-	# listen backlog = 4096
-	# default port = 8125
-	# bind to = udp:localhost tcp:localhost
+	# statsd server max TCP sockets = 131072

 [plugin:idlejitter]
 	# loop time = 20ms
@@ -300,22 +321,32 @@
 	# /sys/class/drm = yes

 [plugin:cgroups]
-	# update every = 1s
-	# check for new cgroups every = 10s
+	#| >>> [plugin:cgroups].update every <<<
+	#| datatype: duration (seconds), default value: 10s
+	update every = 20s
+
+	#| >>> [plugin:cgroups].check for new cgroups every <<<
+	#| datatype: duration (seconds), default value: 10s
+	check for new cgroups every = 20s
+
 	# use unified cgroups = auto
 	# max cgroups to allow = 1000
 	# max cgroups depth to monitor = 0
-	# enable by default cgroups matching =  !*/init.scope  !/system.slice/run-*.scope  *user.slice/docker-* !*user.slice* *.scope  !/machine.slice/*/.control  !/machine.slice/*/payload*  !/machine.slice/*/supervisor  /machine.slice/*.service  */kubepods/pod*/*  */kubepods/*/pod*/*  */*-kubepods-pod*/*  */*-kubepods-*-pod*/*  !*kubepods* !*kubelet*  !*/vcpu*  !*/emulator  !*.mount  !*.partition  !*.service  !*.service/udev  !*.socket  !*.slice  !*.swap  !*.user  !/  !/docker  !*/libvirt  !/lxc  !/lxc/*/*  !/lxc.monitor*  !/lxc.pivot  !/lxc.payload  !*lxcfs.service/.control !/machine  !/qemu  !/system  !/systemd  !/user  * 
-	# enable by default cgroups names matching =  * 
-	# search for cgroups in subpaths matching =  !*/init.scope  !*-qemu  !*.libvirt-qemu  !/init.scope  !/system  !/systemd  !/user  !/lxc/*/*  !/lxc.monitor  !/lxc.payload/*/*  !/lxc.payload.*  * 
+	# enable by default cgroups matching =  !*/init.scope  !/system.slice/run-*.scope  *user.slice/docker-* !*user.slice* *.scope  !/machine.slice/*/.control  !/machine.slice/*/payload*  !/machine.slice/*/supervisor  /machine.slice/*.service  */kubepods/pod*/*  */kubepods/*/pod*/*  */*-kubepods-pod*/*  */*-kubepods-*-pod*/*  !*kubepods* !*kubelet*  !*/vcpu*  !*/emulator  !*.mount  !*.partition  !*.service  !*.service/udev  !*.socket  !*.slice  !*.swap  !*.user  !/  !/docker  !*/libvirt  !/lxc  !/lxc/*/*  !/lxc.monitor*  !/lxc.pivot  !/lxc.payload  !*lxcfs.service/.control !/machine  !/qemu  !/system  !/systemd  !/user  *
+	# enable by default cgroups names matching =  *
+	# search for cgroups in subpaths matching =  !*/init.scope  !*-qemu  !*.libvirt-qemu  !/init.scope  !/system  !/systemd  !/user  !/lxc/*/*  !/lxc.monitor  !/lxc.payload/*/*  !/lxc.payload.*  *
 	# script to get cgroup names = /usr/libexec/netdata/plugins.d/cgroup-name.sh
 	# script to get cgroup network interfaces = /usr/libexec/netdata/plugins.d/cgroup-network
-	# run script to rename cgroups matching =  !/  !*.mount  !*.socket  !*.partition  /machine.slice/*.service  !*.service  !*.slice  !*.swap  !*.user  !init.scope  !*.scope/vcpu*  !*.scope/emulator  *.scope  *docker*  *lxc*  *qemu*  */kubepods/pod*/*  */kubepods/*/pod*/*  */*-kubepods-pod*/*  */*-kubepods-*-pod*/*  !*kubepods* !*kubelet*  *.libvirt-qemu  * 
-	# cgroups to match as systemd services =  !/system.slice/*/*.service  /system.slice/*.service 
+	# run script to rename cgroups matching =  !/  !*.mount  !*.socket  !*.partition  /machine.slice/*.service  !*.service  !*.slice  !*.swap  !*.user  !init.scope  !*.scope/vcpu*  !*.scope/emulator  *.scope  *docker*  *lxc*  *qemu*  */kubepods/pod*/*  */kubepods/*/pod*/*  */*-kubepods-pod*/*  */*-kubepods-*-pod*/*  !*kubepods* !*kubelet*  *.libvirt-qemu  *
+	# cgroups to match as systemd services =  !/system.slice/*/*.service  /system.slice/*.service

 [plugin:proc:diskspace]
+	#| >>> [plugin:proc:diskspace].update every <<<
+	#| reformatted from: 60s
+	#| datatype: duration (seconds), default value: 10s
+	update every = 1m
+
 	# remove charts of unmounted disks = yes
-	# update every = 1s
 	# check for new mount points every = 15s
 	# exclude space metrics on paths = /dev /dev/shm /proc/* /sys/* /var/run/user/* /run/lock /run/user/* /snap/* /var/lib/docker/* /var/lib/containers/storage/* /run/credentials/* /run/containerd/*  /rpool /rpool/*
 	# exclude space metrics on filesystems = *gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs cgroup cgroup2 hugetlbfs devtmpfs fuse.lxcfs
@@ -326,41 +357,29 @@
 [plugin:tc]
 	# script to run to get tc values = /usr/libexec/netdata/plugins.d/tc-qos-helper.sh

-[plugin:python.d]
-	# update every = 1s
-	# command options = 
-
 [plugin:go.d]
-	# update every = 1s
-	# command options = 
+	# update every = 10s
+	# command options =

 [plugin:apps]
-	# update every = 1s
-	# command options = 
+	# update every = 10s
+	# command options =

 [plugin:systemd-journal]
-	# update every = 1s
-	# command options = 
+	# update every = 10s
+	# command options =

 [plugin:network-viewer]
-	# update every = 1s
-	# command options = 
-
-[plugin:charts.d]
-	# update every = 1s
-	# command options = 
+	# update every = 10s
+	# command options =

 [plugin:debugfs]
-	# update every = 1s
-	# command options = 
-
-[plugin:perf]
-	# update every = 1s
-	# command options = 
+	# update every = 10s
+	# command options =

 [plugin:ioping]
-	# update every = 1s
-	# command options = 
+	# update every = 10s
+	# command options =

 [plugin:proc:/proc/net/dev]
 	# compressed packets for all interfaces = no
@@ -580,7 +599,7 @@
 	# hardware packets counters = auto
 	# hardware errors counters = auto
 	# monitor only active ports = auto
-	# disable by default interfaces matching = 
+	# disable by default interfaces matching =
 	# refresh ports state every = 30s

 [plugin:proc:/proc/net/stat/nf_conntrack]
@@ -635,7 +654,7 @@
 	# preferred disk ids = *
 	# exclude disks = loop* ram*
 	# filename to monitor = /host/proc/diskstats
-	# performance metrics for disks with major 252 = yes
+	# performance metrics for disks with major 253 = yes

 [plugin:proc:/proc/mdstat]
 	# faulty devices = yes
@@ -685,3 +704,7 @@

 [plugin:proc:/sys/class/drm]
 	# directory to monitor = /host/sys/class/drm
+
+[plugin:systemd-units]
+	# update every = 10s
+	# command options =
--- a/playbook-netdata.yml
+++ b/playbook-netdata.yml
@@ -41,7 +41,7 @@

    - name: "Copy netdata config file"
      ansible.builtin.template:
-        src: "files/{{ app_name }}/netdata.conf.j2"
+        src: "files/{{ app_name }}/netdata.template.conf"
        dest: "{{ config_dir }}/netdata.conf"
        owner: "{{ app_user }}"
        group: "{{ app_user }}"