aboutsummaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
authorStonewall Jackson <stonewall@sacredheartsc.com>2023-02-14 20:22:16 -0500
committerStonewall Jackson <stonewall@sacredheartsc.com>2023-02-14 20:22:16 -0500
commit363f9bfadf52fed2e83ea22f506758c26163ac5b (patch)
tree43b05e188a738ea6d65766d0c471d55676d5d1b0 /roles
parent7041d5bf05a843566950475d7e2652308535c94c (diff)
downloadselfhosted-363f9bfadf52fed2e83ea22f506758c26163ac5b.tar.gz
selfhosted-363f9bfadf52fed2e83ea22f506758c26163ac5b.zip
add docs for invidious, nagios
Diffstat (limited to 'roles')
-rw-r--r--roles/invidious/README.md3
-rw-r--r--roles/nagios_client/README.md32
-rw-r--r--roles/nagios_server/README.md206
-rw-r--r--roles/nagios_server/defaults/main.yml14
-rw-r--r--roles/nagios_server/vars/main.yml1
5 files changed, 244 insertions, 12 deletions
diff --git a/roles/invidious/README.md b/roles/invidious/README.md
index fd3ee2e..a311c29 100644
--- a/roles/invidious/README.md
+++ b/roles/invidious/README.md
@@ -11,6 +11,9 @@ periodically.
This role configures the application only; it does not configure a reverse
proxy.
+Invidious uses PostgreSQL, but since it does not use `libpq`, it does not
+support GSSAPI authentication. Hence why the `invidious_db_password` variable
+is required.
Variables
---------
diff --git a/roles/nagios_client/README.md b/roles/nagios_client/README.md
new file mode 100644
index 0000000..fffa1bb
--- /dev/null
+++ b/roles/nagios_client/README.md
@@ -0,0 +1,32 @@
+Nagios Client
+=============
+
+Description
+-----------
+
+The `nagios_client` role creates the `nagios` user and installs Nagios plugins.
+
+
+Variables
+---------
+
+This role **accepts** the following variables:
+
+Variable | Default | Description
+---------------------------------|-----------|------------
+`nagios_ssh_pubkey` | &nbsp; | SSH public key for `nagios` user
+
+
+Usage
+-----
+
+Example playbook:
+
+````yaml
+- hosts: all
+ roles:
+ - role: nagios_client
+ vars:
+ nagios_ssh_pubkey: |
+ ssh-ed25519 AAAA...
+````
diff --git a/roles/nagios_server/README.md b/roles/nagios_server/README.md
new file mode 100644
index 0000000..ffdbde6
--- /dev/null
+++ b/roles/nagios_server/README.md
@@ -0,0 +1,206 @@
+Nagios Server
+=============
+
+Description
+-----------
+
+The `nagios_server` role installs and configures the [Nagios](https://www.nagios.org/)
+monitoring system. The check configuration is generated from the Ansible inventory
+using various `nagios_`-prefixed host variables.
+
+This role does not install a webserver; it configures the Nagios application only.
+
+The config file templates are tightly coupled to the group names in the [example inventory](../../inventory-example/).
+If you change the group names, then you'll also need to modify the [object templates](templates/etc/nagios/objects/).
+
+### Why Nagios?
+
+I admit, Nagios is long in the tooth, and the interface leaves much to be
+desired. In my view, it has three major advantages:
+
+ - You can just `dnf install nagios`, and you're ready to go. No supporting
+ infrastructure required.
+
+ - The configuration syntax is extremely simple, making it easy to
+ automatically generate the config files.
+
+ - Extending it with your own plugins is trivial: you just write a script that
+ returns `0`, `1`, or `2`.
+
+You _can_ use Nagios for metrics gathering, but its not well-suited to the task.
+In this project, its used purely for health checks.
+
+I would have preferred to use [Icinga](https://icinga.com/) for its slick
+interface, but they sadly put RPM packages behind a paywall recently.
+
+
+Variables
+---------
+
+This role **accepts** the following variables:
+
+Variable | Default | Description
+--------------------------------------|---------------------------|------------
+`nagios_admin_email` | `root@{{ email_domain }}` | Administrator's email address
+`nagios_admin_pager` | `root@{{ email_domain }}` | Administrator's "pager" (not really used)
+`nagios_access_group` | `role-nagios-access` | FreeIPA group of users allowed to access web interface (will be created)
+`nagios_email` | `root@{{ email_domain }}` | Default contact email for alerts
+`nagios_reboot_window` | `03:00-05:00` | Daily [Time Period](https://assets.nagios.com/downloads/nagioscore/docs/nagioscore/3/en/objectdefinitions.html#timeperiod) for host reboots
+`nagios_ssh_privkey` | &nbsp; | SSH private key for `nagios` user
+`nagios_excluded_groups` | `[]` | List of Ansible group names to exclude from checks
+`nagios_ssh_control_persist` | `20m` | Timeout of persistent SSH connection
+`nagios_snmp_max_size` | 10000 | Maximum size of SNMP responses (bytes)
+`nagios_manubulon_version` | `master` | Git version of [Manubulon](https://github.com/SteScho/manubulon-snmp) to install
+`nagios_check_dns` | `[]` | DNS checks to perform (see [format](#nagios_check_dns) below)
+`nagios_connectivity_check_host` | `8.8.8.8` | Host to use for upstream connectivity check
+`nagios_connectivity_check_count` | 20 | Number of ICMP packets to use for connectivity check
+`nagios_connectivity_check_rtt_warn` | 50.0 | Round-trip-time warning threshold for connectivity check (ms)
+`nagios_connectivity_check_rtt_crit` | 100.0 | Round-trip-time critical threshold for connectivity check (ms)
+`nagios_connectivity_check_loss_warn` | `5%` | Packet loss warning threshold for connectivity check
+`nagios_connectivity_check_loss_crit` | `20%` | Packet loss critical threshold for connectivity check
+
+
+### nagios\_check\_dns
+
+The `nagios_check_dns` variable lists DNS checks to perform. It should contain
+a list of dictionaries of the following format:
+
+Variable | Default | Description
+---------|---------|------------
+name | &nbsp; | FQDN to query
+qtype | `A` | Query type
+server | &nbsp; | Upstream DNS server to query
+expect | &nbsp; | Expected response
+
+
+### Exports
+
+This role **exports** the following variables:
+
+Variable | Description
+-----------------------|------------
+`nagios_html_dir` | Nagios webroot path
+`nagios_apache_config` | Apache config block for Nagios CGI application
+
+
+Host Variables
+--------------
+
+In addition to variables for the `nagios_server` role itself, you can set various
+`nagios_`-prefixed hostvars to influence the check behavior for each host.
+Defaults for these host-specific variables are set in [group\_vars/all/nagios.yml](../../inventory-example/group_vars/all/nagios.yml)
+in the example inventory.
+
+Variable | Description
+----------------------------------|------------
+`nagios_snmp_user` | SNMPv3 username
+`nagios_snmp_community` | SNMP community string
+`nagios_snmp_auth_proto` | SNMPv3 authentication protocol
+`nagios_snmp_priv_proto` | SNMPv3 encryption protocol
+`nagios_snmp_auth_pass` | SNMPv3 authentication password
+`nagios_snmp_priv_pass` | SNMPv3 encryption password
+`nagios_ping_count` | ICMP packet count for `hostalive` check
+`nagios_ping_rtt_warn` | Round-trip time warning threshold for `hostalive` check
+`nagios_ping_rtt_crit` | Round-trip time critical threshold for `hostalive` check
+`nagios_ping_loss_warn` | Packet loss warning threshold for `hostalive` check
+`nagios_ping_loss_crit` | Packet loss critical threshold for `hostalive` check
+`nagios_temp_warn` | Temperature warning threshold (C)
+`nagios_temp_crit` | Temperature critical threshold (C)
+`nagios_power_draw_warn` | Power draw warning threshold (%)
+`nagios_power_draw_crit` | Power draw critical threshold (%)
+`nagios_load_1m_warn` | 1m load average (warn)
+`nagios_load_5m_warn` | 5m load average (warn)
+`nagios_load_15m_warn` | 15m load average (warn)
+`nagios_load_1m_crit` | 1m load average (crit)
+`nagios_load_5m_crit` | 5m load average (crit)
+`nagios_load_15m_crit` | 15m load average (crit)
+`nagios_mem_warn` | Memory usage warning threshold (%)
+`nagios_mem_crit` | Memory usage critical threshold (%)
+`nagios_swap_warn` | Swap usage warning threshold (%)
+`nagios_swap_crit` | Swap usage critical threshold (%)
+`nagios_interface_bandwidth_warn` | Interface bandwith warning threshold (Mbps)
+`nagios_interface_bandwidth_crit` | Interface bandwith critical threshold (Mbps)
+`nagios_interface_discard_warn` | Interface discards warning threshold (per second)
+`nagios_interface_discard_crit` | Interface discards critical threshold (per second)
+`nagios_interface_error_warn` | Interface errors warning threshold (per second)
+`nagios_interface_error_crit` | Interface errors critical threshold (per second)
+`nagios_interfaces` | Per-interface threshold overrides (see [format](#nagios_interfaces) below)
+`nagios_disk_warn` | Disk usage warning threshold (%)
+`nagios_disk_crit` | Disk usage critical threshold (%)
+`nagios_disks` | Per-filesystem threshold overrides (see [format](#nagios_disks) below)
+`nagios_certificate_warn` | Certificate validity days remaining (warning)
+`nagios_certificate_crit` | Certificate validity days remaining (critical)
+`nagios_smtp_warn` | SMTP response time warning threshold (seconds)
+`nagios_smtp_crit` | SMTP response time critical threshold (seconds)
+`nagios_mailq_warn` | Mail queue warning size
+`nagios_mailq_crit` | Mail queue critical size
+`nagios_imap_warn` | IMAP response time warning threshold (seconds)
+`nagios_imap_crit` | IMAP response time warning threshold (seconds)
+`nagios_http_warn` | HTTP response time warning threshold (seconds)
+`nagios_http_crit` | HTTP response time warning threshold (seconds)
+
+
+### nagios\_interfaces
+
+The `nagios_interfaces` variable is used to specify check thresholds for each
+network interface independently. It should contain a list of dictionaries of
+the following format:
+
+Variable | Default | Description
+-----------------|-----------------------------------------|-----------
+`name` | &nbsp; | Interface name
+`regex` | &nbsp; | Regular expression matching one or more interfaces
+`description` | `interface name` | Nagios check name
+`down_ok` | no | Don't alert when interface is down
+`bandwidth_warn` | `{{ nagios_interface_bandwidth_warn }}` | Bandwidth warning threshold (Mbps)
+`bandwidth_crit` | `{{ nagios_interface_bandwidth_crit }}` | Bandwidth critical threshold (Mbps)
+`discard_warn` | `{{ nagios_interface_discard_warn }}` | Discard warning threshold (per second)
+`discard_crit` | `{{ nagios_interface_discard_crit }}` | Discard critical threshold (per second)
+`error_warn` | `{{ nagios_interface_error_warn }}` | Error warning threshold (per second)
+`error_crit` | `{{ nagios_interface_error_crit }}` | Error critical threshold (per second)
+
+The `nagios_interfaces` variable can also contain a simple list of interface
+names, in which case the default check thresholds will be used.
+
+
+### nagios\_disks
+
+The `nagios_disks` variable is used to specify check thresholds for each
+filesystem independently. It should contain a list of dictionaries of
+the following format:
+
+Variable | Default | Description
+--------------|--------------------------|------------
+`path` | &nbsp; | Path of the disk's mountpoint
+`regex` | &nbsp; | Regular expression matching one or more mountpoints
+`description` | `mount path` | Nagios check name
+`exclude` | no | Treat mountpoint as exclusion pattern
+`terse` | no | Use shorter check output
+`warn` | `{{ nagios_disk_warn }}` | Disk usage warning threshold (%)
+`crit` | `{{ nagios_disk_crit }}` | Disk usage critical threshold (%)
+
+The `nagios_disks` variable can also contain a simple list of mountpoints, in
+which case the default check thresholds will be used.
+
+
+Usage
+-----
+
+Example playbook:
+
+````yaml
+- hosts: nagios_servers
+ roles:
+ - role: nagios_server
+ vars:
+ nagios_check_dns:
+ - name: example.com
+ qtype: A
+ server: 8.8.8.8
+ expect: 1.2.3.4
+
+ - role: apache_vhost
+ vars:
+ apache_document_root: '{{ nagios_html_dir }}'
+ apache_config: '{{ nagios_apache_config }}'
+````
diff --git a/roles/nagios_server/defaults/main.yml b/roles/nagios_server/defaults/main.yml
index c963b93..daedfe9 100644
--- a/roles/nagios_server/defaults/main.yml
+++ b/roles/nagios_server/defaults/main.yml
@@ -11,11 +11,8 @@ nagios_ssh_control_persist: 20m
nagios_snmp_max_size: 10000
-nagios_check_dns:
- - name: example.com
- qtype: A
- server: 8.8.8.8
- expect: 1.2.3.4
+nagios_check_dns: []
+nagios_excluded_groups: []
nagios_connectivity_check_host: 8.8.8.8
nagios_connectivity_check_count: 20
@@ -24,11 +21,4 @@ nagios_connectivity_check_rtt_crit: 100.0
nagios_connectivity_check_loss_warn: 5%
nagios_connectivity_check_loss_crit: 20%
-nagios_manubulon_repo: https://github.com/SteScho/manubulon-snmp
nagios_manubulon_version: master
-
-# key: name, value: url
-nagios_thirdparty_plugins: {}
-
-# key: name, value: url
-nagios_thirdparty_mibs: {}
diff --git a/roles/nagios_server/vars/main.yml b/roles/nagios_server/vars/main.yml
index aa8effa..90036e8 100644
--- a/roles/nagios_server/vars/main.yml
+++ b/roles/nagios_server/vars/main.yml
@@ -23,6 +23,7 @@ nagios_plugin_dir: /usr/lib64/nagios/plugins
nagios_mib_dir: /usr/share/snmp/mibs
+nagios_manubulon_repo: https://github.com/SteScho/manubulon-snmp
nagios_manubulon_install_dir: /usr/local/share/manubulon
nagios_thirdparty_plugins: