aboutsummaryrefslogtreecommitdiffstats
path: root/roles/tika
diff options
context:
space:
mode:
Diffstat (limited to 'roles/tika')
-rw-r--r--roles/tika/defaults/main.yml3
-rw-r--r--roles/tika/handlers/main.yml4
-rw-r--r--roles/tika/tasks/main.yml69
-rw-r--r--roles/tika/templates/etc/sysconfig/tika.j23
-rw-r--r--roles/tika/templates/etc/systemd/system/tika.service.j253
-rw-r--r--roles/tika/templates/etc/tika/config.xml.j215
-rw-r--r--roles/tika/templates/etc/tika/log4j2.xml.j218
-rw-r--r--roles/tika/vars/main.yml4
8 files changed, 169 insertions, 0 deletions
diff --git a/roles/tika/defaults/main.yml b/roles/tika/defaults/main.yml
new file mode 100644
index 0000000..9ca8d89
--- /dev/null
+++ b/roles/tika/defaults/main.yml
@@ -0,0 +1,3 @@
+tika_version: 2.6.0
+tika_port: 9998
+tika_heap_size: 2g
diff --git a/roles/tika/handlers/main.yml b/roles/tika/handlers/main.yml
new file mode 100644
index 0000000..56f3127
--- /dev/null
+++ b/roles/tika/handlers/main.yml
@@ -0,0 +1,4 @@
+- name: restart tika
+ systemd:
+ name: tika
+ state: restarted
diff --git a/roles/tika/tasks/main.yml b/roles/tika/tasks/main.yml
new file mode 100644
index 0000000..d2d59dc
--- /dev/null
+++ b/roles/tika/tasks/main.yml
@@ -0,0 +1,69 @@
+- name: install java
+ dnf:
+ name: java-17-openjdk-headless
+ state: present
+
+- name: create tika installation directory
+ file:
+ path: '{{ tika_install_dir }}'
+ state: directory
+
+- name: download jar file
+ get_url:
+ url: '{{ tika_url }}'
+ dest: '{{ tika_install_dir }}/tika-server.jar'
+ mode: 0444
+ notify: restart tika
+
+- name: add local user
+ user:
+ name: tika
+ system: yes
+ home: '{{ tika_data_dir }}'
+ shell: /sbin/nologin
+ create_home: no
+
+- name: create data directory
+ file:
+ path: '{{ tika_data_dir }}'
+ state: directory
+ owner: tika
+ group: tika
+ mode: 0770
+
+- name: create systemd unit
+ template:
+ src: etc/systemd/system/tika.service.j2
+ dest: /etc/systemd/system/tika.service
+ register: tika_unit
+
+- name: reload systemd units
+ systemd:
+ daemon_reload: yes
+ when: tika_unit.changed
+
+- name: create environment file
+ template:
+ src: etc/sysconfig/tika.j2
+ dest: /etc/sysconfig/tika
+ notify: restart tika
+
+- name: create configuration directory
+ file:
+ path: '{{ tika_conf_dir }}'
+ state: directory
+
+- name: generate config files
+ template:
+ src: '{{ tika_conf_dir[1:] }}/{{ item }}.j2'
+ dest: '{{ tika_conf_dir }}/{{ item }}'
+ loop:
+ - log4j2.xml
+ - config.xml
+ notify: restart tika
+
+- name: start tika
+ systemd:
+ name: tika
+ enabled: yes
+ state: started
diff --git a/roles/tika/templates/etc/sysconfig/tika.j2 b/roles/tika/templates/etc/sysconfig/tika.j2
new file mode 100644
index 0000000..1e2ac54
--- /dev/null
+++ b/roles/tika/templates/etc/sysconfig/tika.j2
@@ -0,0 +1,3 @@
+JVM_HEAP_SIZE={{ tika_heap_size }}
+TIKA_PORT={{ tika_port }}
+TIKA_OPTS="-Dlog4j.configurationFile={{ tika_conf_dir }}/log4j2.xml"
diff --git a/roles/tika/templates/etc/systemd/system/tika.service.j2 b/roles/tika/templates/etc/systemd/system/tika.service.j2
new file mode 100644
index 0000000..f888fcb
--- /dev/null
+++ b/roles/tika/templates/etc/systemd/system/tika.service.j2
@@ -0,0 +1,53 @@
+[Unit]
+Description=Apache Tika
+Before=dovecot.service
+
+[Service]
+Type=simple
+User=tika
+Restart=on-failure
+
+ProtectSystem=strict
+ReadWritePaths={{ tika_data_dir }} /var/log/tika
+
+# Harden this java nightmare
+NoNewPrivileges=yes
+PrivateTmp=yes
+PrivateDevices=yes
+DevicePolicy=closed
+ProtectSystem=strict
+ProtectHome=yes
+ProtectControlGroups=yes
+ProtectKernelModules=yes
+ProtectKernelTunables=yes
+RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
+RestrictNamespaces=yes
+RestrictRealtime=yes
+RestrictSUIDSGID=yes
+LockPersonality=yes
+
+WorkingDirectory={{ tika_install_dir }}
+LogsDirectory=tika
+
+Environment=TIKA_DATA_HOME={{ tika_data_dir }}
+Environment=JVM_ARGS=
+Environment=TIKA_OPTS=
+Environment=JVM_GC_ARGS="-XX:+UseG1GC -XX:+PerfDisableSharedMem -XX:+ParallelRefProcEnabled -XX:MaxGCPauseMillis=250 -XX:+UseLargePages -XX:+AlwaysPreTouch"
+Environment=TIKA_HOST=localhost
+Environment=TIKA_PORT=9998
+Environment=TIKA_LOGS_DIR=/var/log/tika
+Environment=TIKA_CONFIG_FILE={{ tika_conf_dir }}/config.xml
+EnvironmentFile=/etc/sysconfig/tika
+
+ExecStart=java -server \
+ $JVM_ARGS \
+ $JVM_GC_ARGS \
+ -Dlog4j2.formatMsgNoLookups=true \
+ $TIKA_OPTS \
+ -jar tika-server.jar \
+ -c ${TIKA_CONFIG_FILE} \
+ -h ${TIKA_HOST} \
+ -p ${TIKA_PORT}
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/tika/templates/etc/tika/config.xml.j2 b/roles/tika/templates/etc/tika/config.xml.j2
new file mode 100644
index 0000000..0e8df31
--- /dev/null
+++ b/roles/tika/templates/etc/tika/config.xml.j2
@@ -0,0 +1,15 @@
+<properties>
+ <server>
+ <params>
+ <returnStackTrace>false</returnStackTrace>
+ <forkedJvmArgs>
+ <arg>-Xmx{{ tika_heap_size }}</arg>
+ <arg>-Dlog4jconfigurationFile={{ tika_conf_dir }}/log4j2.xml</arg>
+ </forkedJvmArgs>
+ <endpoints>
+ <endpoint>tika</endpoint>
+ <endpoint>status</endpoint>
+ </endpoints>
+ </params>
+ </server>
+</properties>
diff --git a/roles/tika/templates/etc/tika/log4j2.xml.j2 b/roles/tika/templates/etc/tika/log4j2.xml.j2
new file mode 100644
index 0000000..ae66fbb
--- /dev/null
+++ b/roles/tika/templates/etc/tika/log4j2.xml.j2
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Configuration>
+ <Appenders>
+ <Console name="STDOUT" target="SYSTEM_OUT">
+ <PatternLayout>
+ <Pattern>
+ {% raw %}%maxLen{%-5p %c %m%notEmpty{ =>%ex{short}}}{10240}%n{% endraw %}
+
+ </Pattern>
+ </PatternLayout>
+ </Console>
+ </Appenders>
+ <Loggers>
+ <Root level="INFO">
+ <AppenderRef ref="STDOUT"/>
+ </Root>
+ </Loggers>
+</Configuration>
diff --git a/roles/tika/vars/main.yml b/roles/tika/vars/main.yml
new file mode 100644
index 0000000..e730904
--- /dev/null
+++ b/roles/tika/vars/main.yml
@@ -0,0 +1,4 @@
+tika_url: https://dlcdn.apache.org/tika/{{ tika_version }}/tika-server-standard-{{ tika_version }}.jar
+tika_install_dir: /usr/local/share/tika
+tika_data_dir: /var/lib/tika
+tika_conf_dir: /etc/tika