diff options
author | Stonewall Jackson <stonewall@sacredheartsc.com> | 2023-02-06 20:31:20 -0500 |
---|---|---|
committer | Stonewall Jackson <stonewall@sacredheartsc.com> | 2023-02-06 20:36:16 -0500 |
commit | 969fc7c21dc7fa85dcc516aedf8e816ee8bc8bd6 (patch) | |
tree | 23ac2860839bfe9b6045841a9834f50a36f88a82 | |
parent | d2e954c37b1b2111ae1ca7f489ae170180491522 (diff) | |
download | selfhosted-969fc7c21dc7fa85dcc516aedf8e816ee8bc8bd6.tar.gz selfhosted-969fc7c21dc7fa85dcc516aedf8e816ee8bc8bd6.zip |
set max_retries to UINT32_MAX for asterisk registrations
After a brief internet outage, I noticed that asterisk had given up
trying to reconnect to my upstream SIP server (looks like the default
value of max_retries is 10).
Although the asterisk "registration" object was disconnected, the
"endpoint" object still reported being up, so the
check_asterisk_endpoints nagios plugin did not alert me to the problem.
This commit sets max_retries to UNIT32_MAX for asterisk registrations by
default. It also adds a new nagios plugin, check_asterisk_registrations.
Unfortunately, the ARI does not expose registrations via the REST API,
so I had to write a hacky bash script to parse the asterisk CLI output.
5 files changed, 66 insertions, 9 deletions
diff --git a/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 b/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 index 67a6574..4a6d800 100644 --- a/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 +++ b/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 @@ -3,15 +3,17 @@ ;;;;;;;;;;; [trunk-defaults](!) -type = wizard -sends_auth = yes -sends_registrations = yes -endpoint/rtp_symmetric = yes -endpoint/rewrite_contact = yes -endpoint/send_rpid = yes -endpoint/from_domain = {{ asterisk_from_domain }} -endpoint/allow = !all,ulaw -aor/qualify_frequency = 30 +type = wizard +sends_auth = yes +sends_registrations = yes +endpoint/rtp_symmetric = yes +endpoint/rewrite_contact = yes +endpoint/send_rpid = yes +endpoint/from_domain = {{ asterisk_from_domain }} +endpoint/allow = !all,ulaw +registration/max_retries = 4294967295 +registration/auth_rejection_permanent = no +aor/qualify_frequency = 30 {% for trunk in asterisk_sip_trunks %} [{{ trunk.name }}](trunk-defaults) diff --git a/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations b/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations new file mode 100644 index 0000000..132e4e3 --- /dev/null +++ b/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations @@ -0,0 +1,40 @@ +#!/bin/bash + +set -Eeu -o pipefail +shopt -s lastpipe + +trap 'exit 3' ERR + +ok=() +error=() + +sudo asterisk -rx 'pjsip show registrations' \ + | sed '1,4d' \ + | head -n2 \ + | while read -r uri auth status +do + msg="${auth} is ${status,,}" + if [ "$status" = Registered ]; then + ok+=("$msg") + else + err+=("$msg") + fi +done + +if (( ${#error[@]} )); then + echo 'trunk is not registered!' + RC=2 +else + echo 'all trunks registered' + RC=0 +fi + +if (( ${#error[@]} )); then + printf 'CRIT: %s\n' "${error[@]}" +fi + +if (( ${#ok[@]} )); then + printf 'OK: %s\n' "${ok[@]}" +fi + +exit $RC diff --git a/roles/nagios_client/vars/main.yml b/roles/nagios_client/vars/main.yml index 29fca6d..bbd274d 100644 --- a/roles/nagios_client/vars/main.yml +++ b/roles/nagios_client/vars/main.yml @@ -9,3 +9,4 @@ nagios_sudo_whitelist: - /usr/bin/dnf needs-restarting --reboothint - /usr/bin/dnf needs-restarting --services - /usr/bin/systemctl status -- * + - /usr/sbin/asterisk -rx pjsip show registrations diff --git a/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 b/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 index e44d6ab..06503c5 100644 --- a/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 +++ b/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 @@ -167,6 +167,13 @@ define command { command_line $USER1$/check_asterisk_endpoints -H '$_HOSTFQDN$' -P '$ARG1$' -u '$ARG2$' -p '$ARG3$' $ARG4$ } +define command { + command_name check_asterisk_registrations + command_line $USER1$/check_by_ssh \ + {{ nagios_check_by_ssh_args }} \ + --command='check_asterisk_registrations' +} + ###################################### # Service Checks: Certificate Validity diff --git a/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 b/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 index 68b4fe4..032fbba 100644 --- a/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 +++ b/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 @@ -228,6 +228,13 @@ define service { # Asterisk Checks ################# +define service { + service_description registrations + hostgroups asterisk_servers + use generic-service + check_command check_asterisk_registrations +} + {% for host in groups.asterisk_servers %} # endpoints - {{ host }} define service { |