mirror of https://github.com/Aidaho12/haproxy-wi
parent
ad67ff10dc
commit
469f1f3727
|
@ -72,7 +72,7 @@ try:
|
|||
|
||||
cmd = "ps ax |grep metrics_master |grep -v grep |wc -l"
|
||||
metrics_master, stderr = funct.subprocess_execute(cmd)
|
||||
cmd = "ps ax |grep checker_mas |grep -v grep |wc -l"
|
||||
cmd = "systemctl status checker_haproxy |grep Act |awk '{print $2}'"
|
||||
checker_master, stderr = funct.subprocess_execute(cmd)
|
||||
cmd = "ps ax |grep -e 'keep_alive.py' |grep -v grep |wc -l"
|
||||
keep_alive, stderr = funct.subprocess_execute(cmd)
|
||||
|
|
|
@ -101,17 +101,27 @@
|
|||
{% endif %}
|
||||
</td>
|
||||
<td class="third-collumn-wi">
|
||||
{% if checker_master|int() >= 1 %}
|
||||
{% if checker_master == 'active' %}
|
||||
<span title="running {{ checker_master }} master processes"><span class="serverUp server-status"></span></span>
|
||||
{% else %}
|
||||
<span class="serverDown server-status"></span>
|
||||
{% endif %}
|
||||
{% if role <= 1 %}
|
||||
<a href="/app/viewlogs.py?viewlogs={{checker_log_id}}&rows=10&grep=&hour=00&minut=00&hour1=24&minut1=00" title="View checker master's logs" class="logs_link">
|
||||
{% if role <= 1 %}
|
||||
<a href="/app/viewlogs.py?viewlogs={{checker_log_id}}&rows=10&grep=&hour=00&minut=00&hour1=24&minut1=00" title="View checker master's logs" class="logs_link">
|
||||
<span>Checker master</span>
|
||||
</a>
|
||||
{% else %}
|
||||
<span>Checker master</span>
|
||||
</a>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<span>Checker master</span>
|
||||
{% if checker_master == 'inactive' or checker_master == 'failed' %}
|
||||
<span title="Checker is stopped"><span class="serverDown server-status"></span></span>
|
||||
<a href="/app/users.py#services" title="Start Checker - HAProxy-WI service" class="logs_link">
|
||||
Checker master
|
||||
</a>
|
||||
{% else %}
|
||||
<span title="Backends checker is not installed"><span class="serverNone server-status"></span></span>
|
||||
<a href="https://haproxy-wi.org/services.py?service=checker#installation" title="Backends checker installation" target="_blank" class="logs_link">
|
||||
Checker master
|
||||
</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</td>
|
||||
<td class="third-collumn-wi">
|
||||
|
@ -182,7 +192,7 @@
|
|||
</a>
|
||||
{% else %}
|
||||
{% if smon == 'inactive' or smon== 'failed' %}
|
||||
<span title="SMON is stoped"><span class="serverDown server-status"></span></span>
|
||||
<span title="SMON is stopped"><span class="serverDown server-status"></span></span>
|
||||
<a href="/app/users.py#services" title="Start SMON - HAProxy-WI service" class="logs_link">
|
||||
SMON
|
||||
</a>
|
||||
|
|
|
@ -1,79 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import time
|
||||
import argparse
|
||||
import os, sys
|
||||
sys.path.append(os.path.join(sys.path[0], os.path.dirname(os.getcwd())))
|
||||
sys.path.append(os.path.join(sys.path[0], os.getcwd()))
|
||||
import funct
|
||||
import sql
|
||||
import signal
|
||||
|
||||
class GracefulKiller:
|
||||
kill_now = False
|
||||
def __init__(self):
|
||||
signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||
signal.signal(signal.SIGTERM, self.exit_gracefully)
|
||||
|
||||
def exit_gracefully(self,signum, frame):
|
||||
self.kill_now = True
|
||||
|
||||
def main():
|
||||
servers = sql.select_alert()
|
||||
started_workers = get_worker()
|
||||
servers_list = []
|
||||
|
||||
for serv in servers:
|
||||
servers_list.append(serv[0])
|
||||
|
||||
need_kill=list(set(started_workers) - set(servers_list))
|
||||
need_start=list(set(servers_list) - set(started_workers))
|
||||
|
||||
if need_kill:
|
||||
for serv in need_kill:
|
||||
kill_worker(serv)
|
||||
|
||||
if need_start:
|
||||
for serv in need_start:
|
||||
start_worker(serv)
|
||||
|
||||
def start_worker(serv):
|
||||
port = sql.get_setting('haproxy_sock_port')
|
||||
cmd = "tools/checker_worker.py %s --port %s &" % (serv, port)
|
||||
os.system(cmd)
|
||||
funct.logging("localhost", " Master started new worker for: "+serv, alerting=1)
|
||||
|
||||
def kill_worker(serv):
|
||||
cmd = "ps ax |grep 'tools/checker_worker.py %s'|grep -v grep |awk '{print $1}' |xargs kill" % serv
|
||||
output, stderr = funct.subprocess_execute(cmd)
|
||||
funct.logging("localhost", " Master killed worker for: "+serv, alerting=1)
|
||||
if stderr:
|
||||
funct.logging("localhost", stderr, alerting=1)
|
||||
|
||||
def kill_all_workers():
|
||||
cmd = "ps ax |grep 'tools/checker_worker.py' |grep -v grep |awk '{print $1}' |xargs kill"
|
||||
output, stderr = funct.subprocess_execute(cmd)
|
||||
funct.logging("localhost", " Master killing all workers", alerting=1)
|
||||
if stderr:
|
||||
funct.logging("localhost", stderr, alerting=1)
|
||||
|
||||
def get_worker():
|
||||
cmd = "ps ax |grep 'tools/checker_worker.py' |grep -v grep |awk '{print $7}'"
|
||||
output, stderr = funct.subprocess_execute(cmd)
|
||||
if stderr:
|
||||
funct.logging("localhost", stderr, alerting=1)
|
||||
return output
|
||||
|
||||
if __name__ == "__main__":
|
||||
funct.logging("localhost", " Checker master started", alerting=1)
|
||||
killer = GracefulKiller()
|
||||
|
||||
while True:
|
||||
main()
|
||||
time.sleep(20)
|
||||
|
||||
if killer.kill_now:
|
||||
break
|
||||
|
||||
kill_all_workers()
|
||||
funct.logging("localhost", " Checker master shutdown", alerting=1)
|
|
@ -1,106 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
from subprocess import check_output, CalledProcessError
|
||||
import time
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.join(sys.path[0], os.path.dirname(os.getcwd())))
|
||||
sys.path.append(os.path.join(sys.path[0], os.getcwd()))
|
||||
import funct
|
||||
import signal
|
||||
|
||||
class GracefulKiller:
|
||||
kill_now = False
|
||||
def __init__(self):
|
||||
signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||
signal.signal(signal.SIGTERM, self.exit_gracefully)
|
||||
|
||||
def exit_gracefully(self,signum, frame):
|
||||
self.kill_now = True
|
||||
|
||||
def main(serv, port):
|
||||
port = str(port)
|
||||
firstrun = True
|
||||
currentstat = []
|
||||
oldstat = []
|
||||
readstats = ""
|
||||
killer = GracefulKiller()
|
||||
old_stat_service = ""
|
||||
|
||||
while True:
|
||||
try:
|
||||
readstats = subprocess.check_output(["echo show stat | nc "+serv+" "+port], shell=True)
|
||||
except CalledProcessError as e:
|
||||
if firstrun == False:
|
||||
cur_stat_service = "error"
|
||||
if old_stat_service != cur_stat_service:
|
||||
alert = "Can't connect to HAProxy service at " + serv
|
||||
funct.telegram_send_mess(str(alert), ip=serv)
|
||||
funct.logging("localhost", " "+alert, alerting=1)
|
||||
|
||||
firstrun = False
|
||||
old_stat_service = cur_stat_service
|
||||
time.sleep(60)
|
||||
continue
|
||||
except OSError as e:
|
||||
print(e)
|
||||
sys.exit()
|
||||
else:
|
||||
cur_stat_service = "Ok"
|
||||
if firstrun == False:
|
||||
if old_stat_service != cur_stat_service:
|
||||
alert = "Now UP HAProxy service at " + serv
|
||||
funct.telegram_send_mess(str(alert), ip=serv)
|
||||
funct.logging("localhost", " "+alert, alerting=1)
|
||||
firstrun = True
|
||||
time.sleep(5)
|
||||
old_stat_service = cur_stat_service
|
||||
|
||||
vips = readstats.splitlines()
|
||||
|
||||
for i in range(0,len(vips)):
|
||||
if "UP" in str(vips[i]):
|
||||
currentstat.append("UP")
|
||||
elif "DOWN" in str(vips[i]):
|
||||
currentstat.append("DOWN")
|
||||
elif "MAINT" in str(vips[i]):
|
||||
currentstat.append("MAINT")
|
||||
else:
|
||||
currentstat.append("none")
|
||||
|
||||
if firstrun == False:
|
||||
if (currentstat[i] != oldstat[i] and currentstat[i]!="none") and ("FRONTEND" not in str(vips[i]) and "BACKEND" not in str(vips[i])):
|
||||
servername = str(vips[i])
|
||||
servername = servername.split(",")
|
||||
realserver = servername[0]
|
||||
server = servername[1]
|
||||
alert = "Backend: "+realserver[2:]+", server: "+server+" has changed status to "+ currentstat[i] + " on " + serv + " HAProxy"
|
||||
funct.telegram_send_mess(str(alert), ip=serv)
|
||||
funct.logging("localhost", " "+alert, alerting=1)
|
||||
firstrun = False
|
||||
oldstat = currentstat
|
||||
currentstat = []
|
||||
time.sleep(60)
|
||||
|
||||
if killer.kill_now:
|
||||
break
|
||||
|
||||
funct.logging("localhost", " Worker shutdown for: "+serv, alerting=1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Check HAProxy servers state.', prog='check_haproxy.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
||||
parser.add_argument('IP', help='Start check HAProxy server state at this ip', nargs='?', type=str)
|
||||
parser.add_argument('--port', help='Start check HAProxy server state at this port', nargs='?', default=1999, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.IP is None:
|
||||
parser.print_help()
|
||||
import sys
|
||||
sys.exit()
|
||||
else:
|
||||
try:
|
||||
main(args.IP, args.port)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
|
@ -18,7 +18,7 @@ try:
|
|||
ldap_enable = sql.get_setting('ldap_enable')
|
||||
grafana, stderr = funct.subprocess_execute("service grafana-server status |grep Active |awk '{print $1}'")
|
||||
services = []
|
||||
services_name = {"checker_haproxy":"Master checker service",
|
||||
services_name = {"checker_haproxy":"Master backends checker service",
|
||||
"keep_alive":"Auto start service",
|
||||
"metrics_haproxy":"Master metrics service",
|
||||
"prometheus":"Prometheus service",
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
/var/www/haproxy-wi/log/checker-error.log {
|
||||
daily
|
||||
rotate 10
|
||||
missingok
|
||||
notifempty
|
||||
create 0644 apache apache
|
||||
dateext
|
||||
sharedscripts
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
if $programname startswith 'checker' then /var/www/haproxy-wi/log/checker-error.log
|
||||
& stop
|
|
@ -1,19 +0,0 @@
|
|||
[Unit]
|
||||
Description=Haproxy backends state checker
|
||||
After=syslog.target network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/var/www/haproxy-wi/app/
|
||||
ExecStart=/var/www/haproxy-wi/app/tools/checker_master.py
|
||||
|
||||
StandardOutput=syslog
|
||||
StandardError=syslog
|
||||
SyslogIdentifier=checker
|
||||
|
||||
RestartSec=2s
|
||||
Restart=on-failure
|
||||
TimeoutStopSec=1s
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
Loading…
Reference in New Issue