对接redis优化

pull/46/head
starsliao 2022-11-22 18:09:32 +08:00
parent bc8defae7f
commit 148ebdca88
13 changed files with 295 additions and 47 deletions

View File

@ -8,7 +8,7 @@ if consul_kv.get_kv_dict(skey_path) == {}:
consul_kv.put_kv(skey_path,{'sk':''.join(str(uuid.uuid4()).split('-'))})
from views import login, blackbox, consul, jobs, nodes, selfnode, selfrds, selfredis, avd, exp, jms, edit_cloud, ldap, rds, redis
from views.prom import cloud_mysql_metrics
from views.prom import cloud_metrics
from units.cloud import huaweicloud,alicloud,tencent_cloud
from units.avd import avd_list
from units.jms import sync_jms
@ -29,7 +29,7 @@ app.register_blueprint(avd.blueprint)
app.register_blueprint(exp.blueprint)
app.register_blueprint(jms.blueprint)
app.register_blueprint(edit_cloud.blueprint)
app.register_blueprint(cloud_mysql_metrics.blueprint)
app.register_blueprint(cloud_metrics.blueprint)
app.register_blueprint(ldap.blueprint)
app.register_blueprint(rds.blueprint)
app.register_blueprint(redis.blueprint)

View File

@ -27,11 +27,11 @@ def w2consul(vendor,account,region,ecs_dict):
#对consul中关机的ecs做标记。
if v['status'] in ['SHUTOFF','Stopped','STOPPED']:
off = off + 1
tags = ['shutoff', region]
tags = ['OFF', v['ostype'], region]
stat = 'off'
else:
on = on + 1
tags = [v['ostype'],region]
tags = ['ON', v['ostype'], region]
stat = 'on'
custom_ecs = consul_kv.get_value(f'ConsulManager/assets/sync_ecs_custom/{iid}')
port = custom_ecs.get('port')

View File

@ -27,11 +27,11 @@ def w2consul(vendor,account,region,rds_dict):
#对consul中关机的rds做标记。
if v['status'] in ['SHUTDOWN']:
off = off + 1
tags = ['shutoff',v['itype'],v['ver'], region]
tags = ['OFF',v['itype'],v['ver'], region]
stat = 'off'
else:
on = on + 1
tags = [v['itype'],v['ver'],region]
tags = ['ON',v['itype'],v['ver'],region]
stat = 'on'
custom_rds = consul_kv.get_value(f'ConsulManager/assets/sync_rds_custom/{iid}')
port = custom_rds.get('port')

View File

@ -27,11 +27,11 @@ def w2consul(vendor,account,region,redis_dict):
#对consul中关机的redis做标记。
if v['status'] in ['SHUTDOWN']:
off = off + 1
tags = ['shutoff',v['itype'],v['ver'], region]
tags = ['OFF', v['itype'], v['ver'], region]
stat = 'off'
else:
on = on + 1
tags = [v['itype'],v['ver'],region]
tags = ['ON', v['itype'], v['ver'], region]
stat = 'on'
custom_redis = consul_kv.get_value(f'ConsulManager/assets/sync_redis_custom/{iid}')
port = custom_redis.get('port')
@ -39,7 +39,7 @@ def w2consul(vendor,account,region,redis_dict):
if port == None:
port = v['port']
if ip == None:
ip = v['ip']
ip = v['domain']
instance = f'{ip}:{port}'
data = {
'id': iid,
@ -58,7 +58,8 @@ def w2consul(vendor,account,region,redis_dict):
'vendor': vendors.get(vendor,'未找到'),
'mem': v['mem'],
'ver': v['ver'],
'domain':v['domain'],
'ip':v['ip'],
'exp':v['exp'],
'stat': stat
},
"check": {

View File

@ -1,5 +1,74 @@
from config import consul_token,consul_url
def redis_config(region_list,cm_exporter,services_list,exporter):
region_str = '\n - '.join([i.replace('/redis','') for i in region_list])
consul_server = consul_url.split("/")[2]
exporter_config = f"""
- job_name: 'ConsulManager-REDIS'
scrape_interval: 30s
scrape_timeout: 15s
static_configs:
- targets:
- {region_str}
relabel_configs:
- source_labels: [__address__]
target_label: __metrics_path__
regex: (.*)
replacement: /api/cloud_redis_metrics/${{1}}
- target_label: __address__
replacement: {cm_exporter}
"""
configs = f"""
- job_name: redis_exporter
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /scrape
consul_sd_configs:
- server: '{consul_server}'
token: '{consul_token}'
refresh_interval: 30s
services: {services_list}
tags: ['ON']
relabel_configs:
- source_labels: [__meta_consul_service_address,__meta_consul_service_port]
regex: ([^:]+)(?::\d+)?;(\d+)
target_label: __param_target
replacement: $1:$2
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: {exporter}
- source_labels: ['__meta_consul_service_metadata_vendor']
target_label: vendor
- source_labels: ['__meta_consul_service_metadata_region']
target_label: region
- source_labels: ['__meta_consul_service_metadata_group']
target_label: group
- source_labels: ['__meta_consul_service_metadata_account']
target_label: account
- source_labels: ['__meta_consul_service_metadata_name']
target_label: name
- source_labels: ['__meta_consul_service_metadata_iid']
target_label: iid
- source_labels: ['__meta_consul_service_metadata_mem']
target_label: mem
- source_labels: ['__meta_consul_service_metadata_itype']
target_label: itype
- source_labels: ['__meta_consul_service_metadata_ver']
target_label: ver
"""
if not services_list:
return {'code': 20000,'configs': '请选择需要Prometheus从Conusl自动发现的MySQL组' }
if services_list and exporter == '':
return {'code': 20000,'configs': '您已经选择了需要Prometheus从Conusl自动发现MySQL组\n请输入Redis_Exporter的地址和端口例如10.0.0.26:9121' }
if region_list and cm_exporter == '':
return {'code': 20000,'configs': '您已经选择了需要从云监控采集基础指标(CPU、内存、磁盘、IO)的MySQL组\n请输入ConsulManager地址和端口例如10.0.0.26:1026' }
if region_list:
return {'code': 20000,'configs': exporter_config + configs }
else:
return {'code': 20000,'configs': configs }
def rds_config(region_list,cm_exporter,services_list,exporter):
region_str = '\n - '.join([i.replace('/rds','') for i in region_list])
consul_server = consul_url.split("/")[2]
@ -28,6 +97,7 @@ def rds_config(region_list,cm_exporter,services_list,exporter):
token: '{consul_token}'
refresh_interval: 30s
services: {services_list}
tags: ['ON']
relabel_configs:
- source_labels: [__meta_consul_service_address,__meta_consul_service_port]
regex: ([^:]+)(?::\d+)?;(\d+)
@ -87,7 +157,7 @@ def ecs_config(services_list,ostype_list):
token: '{consul_token}'
refresh_interval: 30s
services: {services_list}
tags: ['{ostype}']
tags: ['{ostype}','ON']
relabel_configs:
- source_labels: ['__meta_consul_service']
target_label: cservice
@ -232,6 +302,123 @@ groups:
"""
return {"code": 20000, "rules": rules}
def get_redisrules():
rules = """
groups:
- name: REDIS-Alert
rules:
- alert: RedisDown
expr: redis_up == 0
for: 0m
labels:
severity: critical
annotations:
summary: Redis down (instance {{ $labels.instance }})
description: "Redis instance is down\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisMissingMaster
expr: (count(redis_instance_info{role="master"}) or vector(0)) < 1
for: 0m
labels:
severity: critical
annotations:
summary: Redis missing master (instance {{ $labels.instance }})
description: "Redis cluster has no node marked as master.\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisTooManyMasters
expr: count(redis_instance_info{role="master"}) > 1
for: 0m
labels:
severity: critical
annotations:
summary: Redis too many masters (instance {{ $labels.instance }})
description: "Redis cluster has too many nodes marked as master.\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisDisconnectedSlaves
expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
for: 0m
labels:
severity: critical
annotations:
summary: Redis disconnected slaves (instance {{ $labels.instance }})
description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisReplicationBroken
expr: delta(redis_connected_slaves[1m]) < 0
for: 0m
labels:
severity: critical
annotations:
summary: Redis replication broken (instance {{ $labels.instance }})
description: "Redis instance lost a slave\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisClusterFlapping
expr: changes(redis_connected_slaves[1m]) > 1
for: 2m
labels:
severity: critical
annotations:
summary: Redis cluster flapping (instance {{ $labels.instance }})
description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisMissingBackup
expr: time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24
for: 0m
labels:
severity: critical
annotations:
summary: Redis missing backup (instance {{ $labels.instance }})
description: "Redis has not been backuped for 24 hours\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
# The exporter must be started with --include-system-metrics flag or REDIS_EXPORTER_INCL_SYSTEM_METRICS=true environment variable.
- alert: RedisOutOfSystemMemory
expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90
for: 2m
labels:
severity: warning
annotations:
summary: Redis out of system memory (instance {{ $labels.instance }})
description: "Redis is running out of system memory (> 90%)\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisOutOfConfiguredMaxmemory
expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90
for: 2m
labels:
severity: warning
annotations:
summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
description: "Redis is running out of configured maxmemory (> 90%)\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisTooManyConnections
expr: redis_connected_clients > 100
for: 2m
labels:
severity: warning
annotations:
summary: Redis too many connections (instance {{ $labels.instance }})
description: "Redis instance has too many connections\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisNotEnoughConnections
expr: redis_connected_clients < 5
for: 2m
labels:
severity: warning
annotations:
summary: Redis not enough connections (instance {{ $labels.instance }})
description: "Redis instance should have more connections (> 5)\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
- alert: RedisRejectedConnections
expr: increase(redis_rejected_connections_total[1m]) > 0
for: 0m
labels:
severity: critical
annotations:
summary: Redis rejected connections (instance {{ $labels.instance }})
description: "Some connections to Redis has been rejected\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"
"""
return {"code": 20000, "rules": rules}
def get_rules():
rules = """
groups:

View File

@ -36,7 +36,7 @@ def exporter(vendor,account,region):
value = i['datapoints'][-1]['max']
ts = i['datapoints'][-1]['timestamp']
except:
value = 0
value = -1
ts = now
metric = i['metric_name']
prom_metric_name = metric_name_dict[metric][0].split()[2]

View File

@ -0,0 +1,54 @@
from huaweicloudsdkcore.auth.credentials import BasicCredentials
from huaweicloudsdkces.v1.region.ces_region import CesRegion
from huaweicloudsdkcore.exceptions import exceptions
from huaweicloudsdkces.v1 import *
from datetime import datetime
from units import consul_kv
def exporter(vendor,account,region):
ak,sk = consul_kv.get_aksk(vendor,account)
credentials = BasicCredentials(ak, sk)
client = CesClient.new_builder() \
.with_credentials(credentials) \
.with_region(CesRegion.value_of(region)) \
.build()
metric_name_dict = {"cpu_usage":["# HELP redis_cpu_util CPU使用率","# TYPE redis_cpu_util gauge"],
"memory_usage":["# HELP redis_mem_util 内存使用率","# TYPE redis_mem_util gauge"],
"keyspace_hits_perc":["# HELP redis_hits_util 缓存命中率","# TYPE redis_hits_util gauge"],
"total_connections_received":["# HELP redis_newconn_count 每分钟新建的连接数","# TYPE redis_newconn_count gauge"],
"rx_controlled":["# HELP redis_rx_controlled 每分钟被流控的次数","# TYPE redis_rx_controlled gauge"],
"is_slow_log_exist":["# HELP redis_slow_log 慢日志情况","# TYPE redis_slow_log gauge"]
}
metric_body_list = []
now = int(datetime.now().timestamp()*1000)
redis_list = consul_kv.get_services_list_by_region(f'{vendor}_{account}_redis',region)
try:
for i in metric_name_dict.keys():
for id in redis_list:
metric_body_list.append(MetricInfo(namespace="SYS.DCS",metric_name=i,dimensions=[MetricsDimension(name="dcs_instance_id",value=id)]))
request = BatchListMetricDataRequest()
request.body = BatchListMetricDataRequestBody(to=now,_from=now-120000,filter="max",period="1",metrics=metric_body_list)
#print(now-300000,now)
response = client.batch_list_metric_data(request).to_dict()
for i in response['metrics']:
id= i['dimensions'][0]['value']
try:
value = i['datapoints'][-1]['max']
ts = i['datapoints'][-1]['timestamp']
except:
value = -1
ts = now
metric = i['metric_name']
prom_metric_name = metric_name_dict[metric][0].split()[2]
metric_name_dict[metric].append(f'{prom_metric_name}{{iid="{id}"}} {float(value)} {ts}')
prom_metric_list = []
for x in metric_name_dict.values():
prom_metric_list = prom_metric_list + x
return prom_metric_list
except exceptions.ClientRequestException as e:
print(e.status_code,flush=True)
print(e.request_id,flush=True)
print(e.error_code,flush=True)
print(e.error_msg,flush=True)

View File

@ -0,0 +1,30 @@
from flask import Blueprint,Response
from flask_restful import reqparse, Resource, Api
from config import vendors,regions
from units import token_auth,consul_kv
from units.prom import mysql_huawei,mysql_ali,mysql_tencent,redis_huawei
import json
blueprint = Blueprint('cloud_metrics',__name__)
api = Api(blueprint)
class RdsExporter(Resource):
def get(self,vendor,account,region):
if vendor == 'huaweicloud':
prom_metric_list = mysql_huawei.exporter(vendor,account,region)
elif vendor == 'alicloud':
prom_metric_list = mysql_ali.exporter(vendor,account,region)
elif vendor == 'tencent_cloud':
prom_metric_list = mysql_tencent.exporter(vendor,account,region)
return Response('\n'.join(prom_metric_list).encode('utf-8'),mimetype="text/plain")
class RedisExporter(Resource):
def get(self,vendor,account,region):
if vendor == 'huaweicloud':
prom_metric_list = redis_huawei.exporter(vendor,account,region)
#elif vendor == 'alicloud':
#prom_metric_list = mysql_ali.exporter(vendor,account,region)
#elif vendor == 'tencent_cloud':
#prom_metric_list = mysql_tencent.exporter(vendor,account,region)
return Response('\n'.join(prom_metric_list).encode('utf-8'),mimetype="text/plain")
api.add_resource(RdsExporter, '/api/cloud_mysql_metrics/<vendor>/<account>/<region>')
api.add_resource(RedisExporter, '/api/cloud_redis_metrics/<vendor>/<account>/<region>')

View File

@ -1,19 +0,0 @@
from flask import Blueprint,Response
from flask_restful import reqparse, Resource, Api
from config import vendors,regions
from units import token_auth,consul_kv
from units.prom import mysql_huawei,mysql_ali,mysql_tencent
import json
blueprint = Blueprint('cloud_mysql_metrics',__name__)
api = Api(blueprint)
class Exporter(Resource):
def get(self,vendor,account,region):
if vendor == 'huaweicloud':
prom_metric_list = mysql_huawei.exporter(vendor,account,region)
elif vendor == 'alicloud':
prom_metric_list = mysql_ali.exporter(vendor,account,region)
elif vendor == 'tencent_cloud':
prom_metric_list = mysql_tencent.exporter(vendor,account,region)
return Response('\n'.join(prom_metric_list).encode('utf-8'),mimetype="text/plain")
api.add_resource(Exporter, '/api/cloud_mysql_metrics/<vendor>/<account>/<region>')

View File

@ -46,7 +46,7 @@ Object.keys(filters).forEach(key => {
})
Vue.config.productionTip = false
Vue.prototype.VER = 'v0.10.2'
Vue.prototype.VER = 'v0.11.0-alpha'
new Vue({
el: '#app',

View File

@ -4,16 +4,11 @@
<el-row :gutter="20">
<el-col :span="12" :offset="6">
<el-card shadow="always" style="text-align: center">
Grafana 看板详情
<el-link href="https://grafana.com/grafana/dashboards/17320" target="_blank" type="primary">https://grafana.com/grafana/dashboards/17320</el-link><br><br>
Grafana 看板ID<strong>17320</strong>
</el-card>
</el-col>
</el-row>
<br>
<div class="block">
<el-image style="width: 100%; height: 100%" src="/mysql1.png" />
<el-image style="width: 100%; height: 100%" src="/mysql2.png" />
</div>
</div>
</template>

View File

@ -47,21 +47,21 @@
>
<el-table-column type="index" align="center" />
<el-table-column prop="group" label="分组" sortable align="center" width="150" show-overflow-tooltip />
<el-table-column prop="name" label="名称" sortable align="center" width="220" show-overflow-tooltip />
<el-table-column prop="instance" label="实例" sortable align="center" width="180">
<el-table-column prop="name" label="名称" sortable align="center" width="180" show-overflow-tooltip />
<el-table-column prop="instance" label="实例" sortable align="center">
<template slot-scope="{row}">
<span style="font-weight:bold">{{ row.instance }} </span>
<el-tooltip style="diaplay:inline" effect="dark" placement="top">
<div slot="content"> 域名{{ row.domain }}</div>
<div slot="content"> IP{{ row.ip }}</div>
<i class="el-icon-info" />
</el-tooltip>
</template>
</el-table-column>
<el-table-column prop="ver" label="版本" sortable align="center" width="80" />
<el-table-column prop="mem" label="内存" sortable align="center" width="90" />
<el-table-column prop="exp" label="到期日" sortable align="center" width="100" />
<el-table-column prop="itype" label="类型" sortable align="center" show-overflow-tooltip />
<el-table-column prop="iid" label="实例ID" sortable align="center" show-overflow-tooltip />
<el-table-column prop="exp" label="到期日" sortable align="center" width="90" />
<el-table-column prop="itype" label="类型" sortable align="center" width="120" show-overflow-tooltip />
<el-table-column prop="iid" label="实例ID" sortable align="center" width="150" show-overflow-tooltip />
<el-table-column label="操作" align="center" width="120" class-name="small-padding fixed-width">
<template slot-scope="{row}">
<el-button type="primary" size="mini" @click="handleUpdate(row.iid)">

View File

@ -1,10 +1,10 @@
<template>
<div class="app-container">
<el-select v-model="services" multiple placeholder="选择需要自动发现的MySQL组" filterable collapse-tags clearable style="width: 260px" class="filter-item">
<el-select v-model="services" multiple placeholder="选择需要自动发现的REDIS组" filterable collapse-tags clearable style="width: 280px" class="filter-item">
<el-option v-for="item in services_list" :key="item" :label="item" :value="item" />
</el-select>
<el-input v-model="exporter" placeholder="Mysqld_Exporter IP端口" clearable style="width: 200px;" class="filter-item" />&nbsp;&nbsp;
<el-select v-model="jobredis" multiple placeholder="选择需要采集指标的MySQL组" filterable collapse-tags clearable style="width: 260px" class="filter-item">
<el-input v-model="exporter" placeholder="Redis_Exporter IP端口" clearable style="width: 200px;" class="filter-item" />&nbsp;&nbsp;
<el-select v-model="jobredis" multiple placeholder="选择需要采集指标的REDIS组" filterable collapse-tags clearable style="width: 340px" class="filter-item">
<el-option v-for="item in jobredis_list" :key="item" :label="item" :value="item" />
</el-select>
<el-input v-model="cm_exporter" placeholder="ConsulManager IP端口" clearable style="width: 190px;" class="filter-item" />&nbsp;&nbsp;
@ -32,7 +32,7 @@ export default {
jobredis_list: [],
exporter: '',
cm_exporter: '',
configs: '该功能用于生成Prometheus的两个JOB配置生成后请复制到Prometheus配置中\n\n1. 选择需要同步的账号Prometheus即可自动发现该账号下的所有DRS实例。\n\n2. 由于Mysqld_Exporter无法监控到云数据库的CPU、内存、磁盘的使用情况所以ConsulManager开发了Exporter功能配置到Prometheus即可直接从云厂商采集到这些指标\n 选择需要采集指标的REDIS账号区域即可生成Prometheus的JOB配置。'
configs: '该功能用于生成Prometheus的两个JOB配置生成后请复制到Prometheus配置中\n\n1. 选择需要同步的账号Prometheus即可自动发现该账号下的所有DRS实例。\n\n2. 由于Redis_Exporter无法监控到云数据库的CPU、内存、磁盘的使用情况所以ConsulManager开发了Exporter功能配置到Prometheus即可直接从云厂商采集到这些指标\n 选择需要采集指标的REDIS账号区域即可生成Prometheus的JOB配置。'
}
},
created() {