From 969fc7c21dc7fa85dcc516aedf8e816ee8bc8bd6 Mon Sep 17 00:00:00 2001 From: Stonewall Jackson Date: Mon, 6 Feb 2023 20:31:20 -0500 Subject: set max_retries to UINT32_MAX for asterisk registrations After a brief internet outage, I noticed that asterisk had given up trying to reconnect to my upstream SIP server (looks like the default value of max_retries is 10). Although the asterisk "registration" object was disconnected, the "endpoint" object still reported being up, so the check_asterisk_endpoints nagios plugin did not alert me to the problem. This commit sets max_retries to UNIT32_MAX for asterisk registrations by default. It also adds a new nagios plugin, check_asterisk_registrations. Unfortunately, the ARI does not expose registrations via the REST API, so I had to write a hacky bash script to parse the asterisk CLI output. --- .../templates/etc/asterisk/pjsip_wizard.conf.j2 | 20 ++++++----- .../nagios/plugins/check_asterisk_registrations | 40 ++++++++++++++++++++++ roles/nagios_client/vars/main.yml | 1 + .../templates/etc/nagios/objects/commands.cfg.j2 | 7 ++++ .../templates/etc/nagios/objects/services.cfg.j2 | 7 ++++ 5 files changed, 66 insertions(+), 9 deletions(-) create mode 100644 roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations diff --git a/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 b/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 index 67a6574..4a6d800 100644 --- a/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 +++ b/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 @@ -3,15 +3,17 @@ ;;;;;;;;;;; [trunk-defaults](!) -type = wizard -sends_auth = yes -sends_registrations = yes -endpoint/rtp_symmetric = yes -endpoint/rewrite_contact = yes -endpoint/send_rpid = yes -endpoint/from_domain = {{ asterisk_from_domain }} -endpoint/allow = !all,ulaw -aor/qualify_frequency = 30 +type = wizard +sends_auth = yes +sends_registrations = yes +endpoint/rtp_symmetric = yes +endpoint/rewrite_contact = yes +endpoint/send_rpid = yes +endpoint/from_domain = {{ asterisk_from_domain }} +endpoint/allow = !all,ulaw +registration/max_retries = 4294967295 +registration/auth_rejection_permanent = no +aor/qualify_frequency = 30 {% for trunk in asterisk_sip_trunks %} [{{ trunk.name }}](trunk-defaults) diff --git a/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations b/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations new file mode 100644 index 0000000..132e4e3 --- /dev/null +++ b/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations @@ -0,0 +1,40 @@ +#!/bin/bash + +set -Eeu -o pipefail +shopt -s lastpipe + +trap 'exit 3' ERR + +ok=() +error=() + +sudo asterisk -rx 'pjsip show registrations' \ + | sed '1,4d' \ + | head -n2 \ + | while read -r uri auth status +do + msg="${auth} is ${status,,}" + if [ "$status" = Registered ]; then + ok+=("$msg") + else + err+=("$msg") + fi +done + +if (( ${#error[@]} )); then + echo 'trunk is not registered!' + RC=2 +else + echo 'all trunks registered' + RC=0 +fi + +if (( ${#error[@]} )); then + printf 'CRIT: %s\n' "${error[@]}" +fi + +if (( ${#ok[@]} )); then + printf 'OK: %s\n' "${ok[@]}" +fi + +exit $RC diff --git a/roles/nagios_client/vars/main.yml b/roles/nagios_client/vars/main.yml index 29fca6d..bbd274d 100644 --- a/roles/nagios_client/vars/main.yml +++ b/roles/nagios_client/vars/main.yml @@ -9,3 +9,4 @@ nagios_sudo_whitelist: - /usr/bin/dnf needs-restarting --reboothint - /usr/bin/dnf needs-restarting --services - /usr/bin/systemctl status -- * + - /usr/sbin/asterisk -rx pjsip show registrations diff --git a/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 b/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 index e44d6ab..06503c5 100644 --- a/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 +++ b/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 @@ -167,6 +167,13 @@ define command { command_line $USER1$/check_asterisk_endpoints -H '$_HOSTFQDN$' -P '$ARG1$' -u '$ARG2$' -p '$ARG3$' $ARG4$ } +define command { + command_name check_asterisk_registrations + command_line $USER1$/check_by_ssh \ + {{ nagios_check_by_ssh_args }} \ + --command='check_asterisk_registrations' +} + ###################################### # Service Checks: Certificate Validity diff --git a/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 b/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 index 68b4fe4..032fbba 100644 --- a/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 +++ b/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 @@ -228,6 +228,13 @@ define service { # Asterisk Checks ################# +define service { + service_description registrations + hostgroups asterisk_servers + use generic-service + check_command check_asterisk_registrations +} + {% for host in groups.asterisk_servers %} # endpoints - {{ host }} define service { -- cgit