Pavel Loginov 4 years ago
parent
commit
469f1f3727
  1. 2
      app/overview.py
  2. 28
      app/templates/ovw.html
  3. 79
      app/tools/checker_master.py
  4. 106
      app/tools/checker_worker.py
  5. 2
      app/users.py
  6. 9
      config_other/logrotate/checker
  7. 2
      config_other/syslog/checker.conf
  8. 19
      config_other/systemd/checker_haproxy.service

2
app/overview.py

@ -72,7 +72,7 @@ try:
cmd = "ps ax |grep metrics_master |grep -v grep |wc -l"
metrics_master, stderr = funct.subprocess_execute(cmd)
cmd = "ps ax |grep checker_mas |grep -v grep |wc -l"
cmd = "systemctl status checker_haproxy |grep Act |awk '{print $2}'"
checker_master, stderr = funct.subprocess_execute(cmd)
cmd = "ps ax |grep -e 'keep_alive.py' |grep -v grep |wc -l"
keep_alive, stderr = funct.subprocess_execute(cmd)

28
app/templates/ovw.html

@ -101,17 +101,27 @@
{% endif %}
</td>
<td class="third-collumn-wi">
{% if checker_master|int() >= 1 %}
{% if checker_master == 'active' %}
<span title="running {{ checker_master }} master processes"><span class="serverUp server-status"></span></span>
{% else %}
<span class="serverDown server-status"></span>
{% endif %}
{% if role <= 1 %}
<a href="/app/viewlogs.py?viewlogs={{checker_log_id}}&rows=10&grep=&hour=00&minut=00&hour1=24&minut1=00" title="View checker master's logs" class="logs_link">
{% if role <= 1 %}
<a href="/app/viewlogs.py?viewlogs={{checker_log_id}}&rows=10&grep=&hour=00&minut=00&hour1=24&minut1=00" title="View checker master's logs" class="logs_link">
<span>Checker master</span>
</a>
{% else %}
<span>Checker master</span>
</a>
{% endif %}
{% else %}
<span>Checker master</span>
{% if checker_master == 'inactive' or checker_master == 'failed' %}
<span title="Checker is stopped"><span class="serverDown server-status"></span></span>
<a href="/app/users.py#services" title="Start Checker - HAProxy-WI service" class="logs_link">
Checker master
</a>
{% else %}
<span title="Backends checker is not installed"><span class="serverNone server-status"></span></span>
<a href="https://haproxy-wi.org/services.py?service=checker#installation" title="Backends checker installation" target="_blank" class="logs_link">
Checker master
</a>
{% endif %}
{% endif %}
</td>
<td class="third-collumn-wi">
@ -182,7 +192,7 @@
</a>
{% else %}
{% if smon == 'inactive' or smon== 'failed' %}
<span title="SMON is stoped"><span class="serverDown server-status"></span></span>
<span title="SMON is stopped"><span class="serverDown server-status"></span></span>
<a href="/app/users.py#services" title="Start SMON - HAProxy-WI service" class="logs_link">
SMON
</a>

79
app/tools/checker_master.py

@ -1,79 +0,0 @@
#!/usr/bin/env python3
import subprocess
import time
import argparse
import os, sys
sys.path.append(os.path.join(sys.path[0], os.path.dirname(os.getcwd())))
sys.path.append(os.path.join(sys.path[0], os.getcwd()))
import funct
import sql
import signal
class GracefulKiller:
kill_now = False
def __init__(self):
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
def exit_gracefully(self,signum, frame):
self.kill_now = True
def main():
servers = sql.select_alert()
started_workers = get_worker()
servers_list = []
for serv in servers:
servers_list.append(serv[0])
need_kill=list(set(started_workers) - set(servers_list))
need_start=list(set(servers_list) - set(started_workers))
if need_kill:
for serv in need_kill:
kill_worker(serv)
if need_start:
for serv in need_start:
start_worker(serv)
def start_worker(serv):
port = sql.get_setting('haproxy_sock_port')
cmd = "tools/checker_worker.py %s --port %s &" % (serv, port)
os.system(cmd)
funct.logging("localhost", " Master started new worker for: "+serv, alerting=1)
def kill_worker(serv):
cmd = "ps ax |grep 'tools/checker_worker.py %s'|grep -v grep |awk '{print $1}' |xargs kill" % serv
output, stderr = funct.subprocess_execute(cmd)
funct.logging("localhost", " Master killed worker for: "+serv, alerting=1)
if stderr:
funct.logging("localhost", stderr, alerting=1)
def kill_all_workers():
cmd = "ps ax |grep 'tools/checker_worker.py' |grep -v grep |awk '{print $1}' |xargs kill"
output, stderr = funct.subprocess_execute(cmd)
funct.logging("localhost", " Master killing all workers", alerting=1)
if stderr:
funct.logging("localhost", stderr, alerting=1)
def get_worker():
cmd = "ps ax |grep 'tools/checker_worker.py' |grep -v grep |awk '{print $7}'"
output, stderr = funct.subprocess_execute(cmd)
if stderr:
funct.logging("localhost", stderr, alerting=1)
return output
if __name__ == "__main__":
funct.logging("localhost", " Checker master started", alerting=1)
killer = GracefulKiller()
while True:
main()
time.sleep(20)
if killer.kill_now:
break
kill_all_workers()
funct.logging("localhost", " Checker master shutdown", alerting=1)

106
app/tools/checker_worker.py

@ -1,106 +0,0 @@
#!/usr/bin/env python3
import subprocess
from subprocess import check_output, CalledProcessError
import time
import argparse
import os
import sys
sys.path.append(os.path.join(sys.path[0], os.path.dirname(os.getcwd())))
sys.path.append(os.path.join(sys.path[0], os.getcwd()))
import funct
import signal
class GracefulKiller:
kill_now = False
def __init__(self):
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
def exit_gracefully(self,signum, frame):
self.kill_now = True
def main(serv, port):
port = str(port)
firstrun = True
currentstat = []
oldstat = []
readstats = ""
killer = GracefulKiller()
old_stat_service = ""
while True:
try:
readstats = subprocess.check_output(["echo show stat | nc "+serv+" "+port], shell=True)
except CalledProcessError as e:
if firstrun == False:
cur_stat_service = "error"
if old_stat_service != cur_stat_service:
alert = "Can't connect to HAProxy service at " + serv
funct.telegram_send_mess(str(alert), ip=serv)
funct.logging("localhost", " "+alert, alerting=1)
firstrun = False
old_stat_service = cur_stat_service
time.sleep(60)
continue
except OSError as e:
print(e)
sys.exit()
else:
cur_stat_service = "Ok"
if firstrun == False:
if old_stat_service != cur_stat_service:
alert = "Now UP HAProxy service at " + serv
funct.telegram_send_mess(str(alert), ip=serv)
funct.logging("localhost", " "+alert, alerting=1)
firstrun = True
time.sleep(5)
old_stat_service = cur_stat_service
vips = readstats.splitlines()
for i in range(0,len(vips)):
if "UP" in str(vips[i]):
currentstat.append("UP")
elif "DOWN" in str(vips[i]):
currentstat.append("DOWN")
elif "MAINT" in str(vips[i]):
currentstat.append("MAINT")
else:
currentstat.append("none")
if firstrun == False:
if (currentstat[i] != oldstat[i] and currentstat[i]!="none") and ("FRONTEND" not in str(vips[i]) and "BACKEND" not in str(vips[i])):
servername = str(vips[i])
servername = servername.split(",")
realserver = servername[0]
server = servername[1]
alert = "Backend: "+realserver[2:]+", server: "+server+" has changed status to "+ currentstat[i] + " on " + serv + " HAProxy"
funct.telegram_send_mess(str(alert), ip=serv)
funct.logging("localhost", " "+alert, alerting=1)
firstrun = False
oldstat = currentstat
currentstat = []
time.sleep(60)
if killer.kill_now:
break
funct.logging("localhost", " Worker shutdown for: "+serv, alerting=1)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Check HAProxy servers state.', prog='check_haproxy.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('IP', help='Start check HAProxy server state at this ip', nargs='?', type=str)
parser.add_argument('--port', help='Start check HAProxy server state at this port', nargs='?', default=1999, type=int)
args = parser.parse_args()
if args.IP is None:
parser.print_help()
import sys
sys.exit()
else:
try:
main(args.IP, args.port)
except KeyboardInterrupt:
pass

2
app/users.py

@ -18,7 +18,7 @@ try:
ldap_enable = sql.get_setting('ldap_enable')
grafana, stderr = funct.subprocess_execute("service grafana-server status |grep Active |awk '{print $1}'")
services = []
services_name = {"checker_haproxy":"Master checker service",
services_name = {"checker_haproxy":"Master backends checker service",
"keep_alive":"Auto start service",
"metrics_haproxy":"Master metrics service",
"prometheus":"Prometheus service",

9
config_other/logrotate/checker

@ -1,9 +0,0 @@
/var/www/haproxy-wi/log/checker-error.log {
daily
rotate 10
missingok
notifempty
create 0644 apache apache
dateext
sharedscripts
}

2
config_other/syslog/checker.conf

@ -1,2 +0,0 @@
if $programname startswith 'checker' then /var/www/haproxy-wi/log/checker-error.log
& stop

19
config_other/systemd/checker_haproxy.service

@ -1,19 +0,0 @@
[Unit]
Description=Haproxy backends state checker
After=syslog.target network.target
[Service]
Type=simple
WorkingDirectory=/var/www/haproxy-wi/app/
ExecStart=/var/www/haproxy-wi/app/tools/checker_master.py
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=checker
RestartSec=2s
Restart=on-failure
TimeoutStopSec=1s
[Install]
WantedBy=multi-user.target
Loading…
Cancel
Save