aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStonewall Jackson <stonewall@sacredheartsc.com>2023-02-06 20:31:20 -0500
committerStonewall Jackson <stonewall@sacredheartsc.com>2023-02-06 20:36:16 -0500
commit969fc7c21dc7fa85dcc516aedf8e816ee8bc8bd6 (patch)
tree23ac2860839bfe9b6045841a9834f50a36f88a82
parentd2e954c37b1b2111ae1ca7f489ae170180491522 (diff)
downloadselfhosted-969fc7c21dc7fa85dcc516aedf8e816ee8bc8bd6.tar.gz
selfhosted-969fc7c21dc7fa85dcc516aedf8e816ee8bc8bd6.zip
set max_retries to UINT32_MAX for asterisk registrations
After a brief internet outage, I noticed that asterisk had given up trying to reconnect to my upstream SIP server (looks like the default value of max_retries is 10). Although the asterisk "registration" object was disconnected, the "endpoint" object still reported being up, so the check_asterisk_endpoints nagios plugin did not alert me to the problem. This commit sets max_retries to UNIT32_MAX for asterisk registrations by default. It also adds a new nagios plugin, check_asterisk_registrations. Unfortunately, the ARI does not expose registrations via the REST API, so I had to write a hacky bash script to parse the asterisk CLI output.
-rw-r--r--roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j220
-rw-r--r--roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations40
-rw-r--r--roles/nagios_client/vars/main.yml1
-rw-r--r--roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j27
-rw-r--r--roles/nagios_server/templates/etc/nagios/objects/services.cfg.j27
5 files changed, 66 insertions, 9 deletions
diff --git a/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2 b/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2
index 67a6574..4a6d800 100644
--- a/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2
+++ b/roles/asterisk/templates/etc/asterisk/pjsip_wizard.conf.j2
@@ -3,15 +3,17 @@
;;;;;;;;;;;
[trunk-defaults](!)
-type = wizard
-sends_auth = yes
-sends_registrations = yes
-endpoint/rtp_symmetric = yes
-endpoint/rewrite_contact = yes
-endpoint/send_rpid = yes
-endpoint/from_domain = {{ asterisk_from_domain }}
-endpoint/allow = !all,ulaw
-aor/qualify_frequency = 30
+type = wizard
+sends_auth = yes
+sends_registrations = yes
+endpoint/rtp_symmetric = yes
+endpoint/rewrite_contact = yes
+endpoint/send_rpid = yes
+endpoint/from_domain = {{ asterisk_from_domain }}
+endpoint/allow = !all,ulaw
+registration/max_retries = 4294967295
+registration/auth_rejection_permanent = no
+aor/qualify_frequency = 30
{% for trunk in asterisk_sip_trunks %}
[{{ trunk.name }}](trunk-defaults)
diff --git a/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations b/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations
new file mode 100644
index 0000000..132e4e3
--- /dev/null
+++ b/roles/nagios_client/files/usr/lib64/nagios/plugins/check_asterisk_registrations
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+set -Eeu -o pipefail
+shopt -s lastpipe
+
+trap 'exit 3' ERR
+
+ok=()
+error=()
+
+sudo asterisk -rx 'pjsip show registrations' \
+ | sed '1,4d' \
+ | head -n2 \
+ | while read -r uri auth status
+do
+ msg="${auth} is ${status,,}"
+ if [ "$status" = Registered ]; then
+ ok+=("$msg")
+ else
+ err+=("$msg")
+ fi
+done
+
+if (( ${#error[@]} )); then
+ echo 'trunk is not registered!'
+ RC=2
+else
+ echo 'all trunks registered'
+ RC=0
+fi
+
+if (( ${#error[@]} )); then
+ printf 'CRIT: %s\n' "${error[@]}"
+fi
+
+if (( ${#ok[@]} )); then
+ printf 'OK: %s\n' "${ok[@]}"
+fi
+
+exit $RC
diff --git a/roles/nagios_client/vars/main.yml b/roles/nagios_client/vars/main.yml
index 29fca6d..bbd274d 100644
--- a/roles/nagios_client/vars/main.yml
+++ b/roles/nagios_client/vars/main.yml
@@ -9,3 +9,4 @@ nagios_sudo_whitelist:
- /usr/bin/dnf needs-restarting --reboothint
- /usr/bin/dnf needs-restarting --services
- /usr/bin/systemctl status -- *
+ - /usr/sbin/asterisk -rx pjsip show registrations
diff --git a/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2 b/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2
index e44d6ab..06503c5 100644
--- a/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2
+++ b/roles/nagios_server/templates/etc/nagios/objects/commands.cfg.j2
@@ -167,6 +167,13 @@ define command {
command_line $USER1$/check_asterisk_endpoints -H '$_HOSTFQDN$' -P '$ARG1$' -u '$ARG2$' -p '$ARG3$' $ARG4$
}
+define command {
+ command_name check_asterisk_registrations
+ command_line $USER1$/check_by_ssh \
+ {{ nagios_check_by_ssh_args }} \
+ --command='check_asterisk_registrations'
+}
+
######################################
# Service Checks: Certificate Validity
diff --git a/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2 b/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2
index 68b4fe4..032fbba 100644
--- a/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2
+++ b/roles/nagios_server/templates/etc/nagios/objects/services.cfg.j2
@@ -228,6 +228,13 @@ define service {
# Asterisk Checks
#################
+define service {
+ service_description registrations
+ hostgroups asterisk_servers
+ use generic-service
+ check_command check_asterisk_registrations
+}
+
{% for host in groups.asterisk_servers %}
# endpoints - {{ host }}
define service {