diff --git a/flask-consul/manager.py b/flask-consul/manager.py index c4b5c24..67440a7 100755 --- a/flask-consul/manager.py +++ b/flask-consul/manager.py @@ -8,7 +8,7 @@ if consul_kv.get_kv_dict(skey_path) == {}: consul_kv.put_kv(skey_path,{'sk':''.join(str(uuid.uuid4()).split('-'))}) from views import login, blackbox, consul, jobs, nodes, selfnode, selfrds, selfredis, avd, exp, jms, edit_cloud, ldap, rds, redis -from views.prom import cloud_mysql_metrics +from views.prom import cloud_metrics from units.cloud import huaweicloud,alicloud,tencent_cloud from units.avd import avd_list from units.jms import sync_jms @@ -29,7 +29,7 @@ app.register_blueprint(avd.blueprint) app.register_blueprint(exp.blueprint) app.register_blueprint(jms.blueprint) app.register_blueprint(edit_cloud.blueprint) -app.register_blueprint(cloud_mysql_metrics.blueprint) +app.register_blueprint(cloud_metrics.blueprint) app.register_blueprint(ldap.blueprint) app.register_blueprint(rds.blueprint) app.register_blueprint(redis.blueprint) diff --git a/flask-consul/units/cloud/sync_ecs.py b/flask-consul/units/cloud/sync_ecs.py index fecc1fc..fa154a5 100644 --- a/flask-consul/units/cloud/sync_ecs.py +++ b/flask-consul/units/cloud/sync_ecs.py @@ -27,11 +27,11 @@ def w2consul(vendor,account,region,ecs_dict): #对consul中关机的ecs做标记。 if v['status'] in ['SHUTOFF','Stopped','STOPPED']: off = off + 1 - tags = ['shutoff', region] + tags = ['OFF', v['ostype'], region] stat = 'off' else: on = on + 1 - tags = [v['ostype'],region] + tags = ['ON', v['ostype'], region] stat = 'on' custom_ecs = consul_kv.get_value(f'ConsulManager/assets/sync_ecs_custom/{iid}') port = custom_ecs.get('port') diff --git a/flask-consul/units/cloud/sync_rds.py b/flask-consul/units/cloud/sync_rds.py index 6b93f93..488e1bf 100644 --- a/flask-consul/units/cloud/sync_rds.py +++ b/flask-consul/units/cloud/sync_rds.py @@ -27,11 +27,11 @@ def w2consul(vendor,account,region,rds_dict): #对consul中关机的rds做标记。 if v['status'] in ['SHUTDOWN']: off = off + 1 - tags = ['shutoff',v['itype'],v['ver'], region] + tags = ['OFF',v['itype'],v['ver'], region] stat = 'off' else: on = on + 1 - tags = [v['itype'],v['ver'],region] + tags = ['ON',v['itype'],v['ver'],region] stat = 'on' custom_rds = consul_kv.get_value(f'ConsulManager/assets/sync_rds_custom/{iid}') port = custom_rds.get('port') diff --git a/flask-consul/units/cloud/sync_redis.py b/flask-consul/units/cloud/sync_redis.py index 10f6839..2067d39 100644 --- a/flask-consul/units/cloud/sync_redis.py +++ b/flask-consul/units/cloud/sync_redis.py @@ -27,11 +27,11 @@ def w2consul(vendor,account,region,redis_dict): #对consul中关机的redis做标记。 if v['status'] in ['SHUTDOWN']: off = off + 1 - tags = ['shutoff',v['itype'],v['ver'], region] + tags = ['OFF', v['itype'], v['ver'], region] stat = 'off' else: on = on + 1 - tags = [v['itype'],v['ver'],region] + tags = ['ON', v['itype'], v['ver'], region] stat = 'on' custom_redis = consul_kv.get_value(f'ConsulManager/assets/sync_redis_custom/{iid}') port = custom_redis.get('port') @@ -39,7 +39,7 @@ def w2consul(vendor,account,region,redis_dict): if port == None: port = v['port'] if ip == None: - ip = v['ip'] + ip = v['domain'] instance = f'{ip}:{port}' data = { 'id': iid, @@ -58,7 +58,8 @@ def w2consul(vendor,account,region,redis_dict): 'vendor': vendors.get(vendor,'未找到'), 'mem': v['mem'], 'ver': v['ver'], - 'domain':v['domain'], + 'ip':v['ip'], + 'exp':v['exp'], 'stat': stat }, "check": { diff --git a/flask-consul/units/gen_config.py b/flask-consul/units/gen_config.py index 46805db..389682c 100644 --- a/flask-consul/units/gen_config.py +++ b/flask-consul/units/gen_config.py @@ -1,5 +1,74 @@ from config import consul_token,consul_url +def redis_config(region_list,cm_exporter,services_list,exporter): + region_str = '\n - '.join([i.replace('/redis','') for i in region_list]) + consul_server = consul_url.split("/")[2] + exporter_config = f""" + - job_name: 'ConsulManager-REDIS' + scrape_interval: 30s + scrape_timeout: 15s + static_configs: + - targets: + - {region_str} + relabel_configs: + - source_labels: [__address__] + target_label: __metrics_path__ + regex: (.*) + replacement: /api/cloud_redis_metrics/${{1}} + - target_label: __address__ + replacement: {cm_exporter} +""" + configs = f""" + - job_name: redis_exporter + scrape_interval: 15s + scrape_timeout: 10s + metrics_path: /scrape + consul_sd_configs: + - server: '{consul_server}' + token: '{consul_token}' + refresh_interval: 30s + services: {services_list} + tags: ['ON'] + relabel_configs: + - source_labels: [__meta_consul_service_address,__meta_consul_service_port] + regex: ([^:]+)(?::\d+)?;(\d+) + target_label: __param_target + replacement: $1:$2 + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: {exporter} + - source_labels: ['__meta_consul_service_metadata_vendor'] + target_label: vendor + - source_labels: ['__meta_consul_service_metadata_region'] + target_label: region + - source_labels: ['__meta_consul_service_metadata_group'] + target_label: group + - source_labels: ['__meta_consul_service_metadata_account'] + target_label: account + - source_labels: ['__meta_consul_service_metadata_name'] + target_label: name + - source_labels: ['__meta_consul_service_metadata_iid'] + target_label: iid + - source_labels: ['__meta_consul_service_metadata_mem'] + target_label: mem + - source_labels: ['__meta_consul_service_metadata_itype'] + target_label: itype + - source_labels: ['__meta_consul_service_metadata_ver'] + target_label: ver +""" + if not services_list: + return {'code': 20000,'configs': '请选择需要Prometheus从Conusl自动发现的MySQL组' } + if services_list and exporter == '': + return {'code': 20000,'configs': '您已经选择了需要Prometheus从Conusl自动发现MySQL组,\n请输入Redis_Exporter的地址和端口,例如:10.0.0.26:9121' } + if region_list and cm_exporter == '': + return {'code': 20000,'configs': '您已经选择了需要从云监控采集基础指标(CPU、内存、磁盘、IO)的MySQL组,\n请输入ConsulManager地址和端口,例如:10.0.0.26:1026' } + + if region_list: + return {'code': 20000,'configs': exporter_config + configs } + else: + return {'code': 20000,'configs': configs } + def rds_config(region_list,cm_exporter,services_list,exporter): region_str = '\n - '.join([i.replace('/rds','') for i in region_list]) consul_server = consul_url.split("/")[2] @@ -28,6 +97,7 @@ def rds_config(region_list,cm_exporter,services_list,exporter): token: '{consul_token}' refresh_interval: 30s services: {services_list} + tags: ['ON'] relabel_configs: - source_labels: [__meta_consul_service_address,__meta_consul_service_port] regex: ([^:]+)(?::\d+)?;(\d+) @@ -87,7 +157,7 @@ def ecs_config(services_list,ostype_list): token: '{consul_token}' refresh_interval: 30s services: {services_list} - tags: ['{ostype}'] + tags: ['{ostype}','ON'] relabel_configs: - source_labels: ['__meta_consul_service'] target_label: cservice @@ -232,6 +302,123 @@ groups: """ return {"code": 20000, "rules": rules} +def get_redisrules(): + rules = """ +groups: +- name: REDIS-Alert + rules: + - alert: RedisDown + expr: redis_up == 0 + for: 0m + labels: + severity: critical + annotations: + summary: Redis down (instance {{ $labels.instance }}) + description: "Redis instance is down\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisMissingMaster + expr: (count(redis_instance_info{role="master"}) or vector(0)) < 1 + for: 0m + labels: + severity: critical + annotations: + summary: Redis missing master (instance {{ $labels.instance }}) + description: "Redis cluster has no node marked as master.\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisTooManyMasters + expr: count(redis_instance_info{role="master"}) > 1 + for: 0m + labels: + severity: critical + annotations: + summary: Redis too many masters (instance {{ $labels.instance }}) + description: "Redis cluster has too many nodes marked as master.\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisDisconnectedSlaves + expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1 + for: 0m + labels: + severity: critical + annotations: + summary: Redis disconnected slaves (instance {{ $labels.instance }}) + description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisReplicationBroken + expr: delta(redis_connected_slaves[1m]) < 0 + for: 0m + labels: + severity: critical + annotations: + summary: Redis replication broken (instance {{ $labels.instance }}) + description: "Redis instance lost a slave\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisClusterFlapping + expr: changes(redis_connected_slaves[1m]) > 1 + for: 2m + labels: + severity: critical + annotations: + summary: Redis cluster flapping (instance {{ $labels.instance }}) + description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisMissingBackup + expr: time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24 + for: 0m + labels: + severity: critical + annotations: + summary: Redis missing backup (instance {{ $labels.instance }}) + description: "Redis has not been backuped for 24 hours\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + # The exporter must be started with --include-system-metrics flag or REDIS_EXPORTER_INCL_SYSTEM_METRICS=true environment variable. + - alert: RedisOutOfSystemMemory + expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90 + for: 2m + labels: + severity: warning + annotations: + summary: Redis out of system memory (instance {{ $labels.instance }}) + description: "Redis is running out of system memory (> 90%)\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisOutOfConfiguredMaxmemory + expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90 + for: 2m + labels: + severity: warning + annotations: + summary: Redis out of configured maxmemory (instance {{ $labels.instance }}) + description: "Redis is running out of configured maxmemory (> 90%)\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisTooManyConnections + expr: redis_connected_clients > 100 + for: 2m + labels: + severity: warning + annotations: + summary: Redis too many connections (instance {{ $labels.instance }}) + description: "Redis instance has too many connections\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisNotEnoughConnections + expr: redis_connected_clients < 5 + for: 2m + labels: + severity: warning + annotations: + summary: Redis not enough connections (instance {{ $labels.instance }}) + description: "Redis instance should have more connections (> 5)\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" + + - alert: RedisRejectedConnections + expr: increase(redis_rejected_connections_total[1m]) > 0 + for: 0m + labels: + severity: critical + annotations: + summary: Redis rejected connections (instance {{ $labels.instance }}) + description: "Some connections to Redis has been rejected\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}" +""" + return {"code": 20000, "rules": rules} + + def get_rules(): rules = """ groups: diff --git a/flask-consul/units/prom/mysql_huawei.py b/flask-consul/units/prom/mysql_huawei.py index d31b66e..27b88ba 100644 --- a/flask-consul/units/prom/mysql_huawei.py +++ b/flask-consul/units/prom/mysql_huawei.py @@ -36,7 +36,7 @@ def exporter(vendor,account,region): value = i['datapoints'][-1]['max'] ts = i['datapoints'][-1]['timestamp'] except: - value = 0 + value = -1 ts = now metric = i['metric_name'] prom_metric_name = metric_name_dict[metric][0].split()[2] diff --git a/flask-consul/units/prom/redis_huawei.py b/flask-consul/units/prom/redis_huawei.py new file mode 100644 index 0000000..700db62 --- /dev/null +++ b/flask-consul/units/prom/redis_huawei.py @@ -0,0 +1,54 @@ +from huaweicloudsdkcore.auth.credentials import BasicCredentials +from huaweicloudsdkces.v1.region.ces_region import CesRegion +from huaweicloudsdkcore.exceptions import exceptions +from huaweicloudsdkces.v1 import * +from datetime import datetime +from units import consul_kv + +def exporter(vendor,account,region): + ak,sk = consul_kv.get_aksk(vendor,account) + credentials = BasicCredentials(ak, sk) + client = CesClient.new_builder() \ + .with_credentials(credentials) \ + .with_region(CesRegion.value_of(region)) \ + .build() + metric_name_dict = {"cpu_usage":["# HELP redis_cpu_util CPU使用率","# TYPE redis_cpu_util gauge"], + "memory_usage":["# HELP redis_mem_util 内存使用率","# TYPE redis_mem_util gauge"], + "keyspace_hits_perc":["# HELP redis_hits_util 缓存命中率","# TYPE redis_hits_util gauge"], + "total_connections_received":["# HELP redis_newconn_count 每分钟新建的连接数","# TYPE redis_newconn_count gauge"], + "rx_controlled":["# HELP redis_rx_controlled 每分钟被流控的次数","# TYPE redis_rx_controlled gauge"], + "is_slow_log_exist":["# HELP redis_slow_log 慢日志情况","# TYPE redis_slow_log gauge"] + } + metric_body_list = [] + now = int(datetime.now().timestamp()*1000) + redis_list = consul_kv.get_services_list_by_region(f'{vendor}_{account}_redis',region) + try: + for i in metric_name_dict.keys(): + for id in redis_list: + metric_body_list.append(MetricInfo(namespace="SYS.DCS",metric_name=i,dimensions=[MetricsDimension(name="dcs_instance_id",value=id)])) + + request = BatchListMetricDataRequest() + request.body = BatchListMetricDataRequestBody(to=now,_from=now-120000,filter="max",period="1",metrics=metric_body_list) + #print(now-300000,now) + response = client.batch_list_metric_data(request).to_dict() + for i in response['metrics']: + id= i['dimensions'][0]['value'] + try: + value = i['datapoints'][-1]['max'] + ts = i['datapoints'][-1]['timestamp'] + except: + value = -1 + ts = now + metric = i['metric_name'] + prom_metric_name = metric_name_dict[metric][0].split()[2] + metric_name_dict[metric].append(f'{prom_metric_name}{{iid="{id}"}} {float(value)} {ts}') + prom_metric_list = [] + for x in metric_name_dict.values(): + prom_metric_list = prom_metric_list + x + return prom_metric_list + except exceptions.ClientRequestException as e: + print(e.status_code,flush=True) + print(e.request_id,flush=True) + print(e.error_code,flush=True) + print(e.error_msg,flush=True) + diff --git a/flask-consul/views/prom/cloud_metrics.py b/flask-consul/views/prom/cloud_metrics.py new file mode 100644 index 0000000..2e9782e --- /dev/null +++ b/flask-consul/views/prom/cloud_metrics.py @@ -0,0 +1,30 @@ +from flask import Blueprint,Response +from flask_restful import reqparse, Resource, Api +from config import vendors,regions +from units import token_auth,consul_kv +from units.prom import mysql_huawei,mysql_ali,mysql_tencent,redis_huawei +import json +blueprint = Blueprint('cloud_metrics',__name__) +api = Api(blueprint) + +class RdsExporter(Resource): + def get(self,vendor,account,region): + if vendor == 'huaweicloud': + prom_metric_list = mysql_huawei.exporter(vendor,account,region) + elif vendor == 'alicloud': + prom_metric_list = mysql_ali.exporter(vendor,account,region) + elif vendor == 'tencent_cloud': + prom_metric_list = mysql_tencent.exporter(vendor,account,region) + return Response('\n'.join(prom_metric_list).encode('utf-8'),mimetype="text/plain") + +class RedisExporter(Resource): + def get(self,vendor,account,region): + if vendor == 'huaweicloud': + prom_metric_list = redis_huawei.exporter(vendor,account,region) + #elif vendor == 'alicloud': + #prom_metric_list = mysql_ali.exporter(vendor,account,region) + #elif vendor == 'tencent_cloud': + #prom_metric_list = mysql_tencent.exporter(vendor,account,region) + return Response('\n'.join(prom_metric_list).encode('utf-8'),mimetype="text/plain") +api.add_resource(RdsExporter, '/api/cloud_mysql_metrics///') +api.add_resource(RedisExporter, '/api/cloud_redis_metrics///') diff --git a/flask-consul/views/prom/cloud_mysql_metrics.py b/flask-consul/views/prom/cloud_mysql_metrics.py deleted file mode 100644 index 51bf4cb..0000000 --- a/flask-consul/views/prom/cloud_mysql_metrics.py +++ /dev/null @@ -1,19 +0,0 @@ -from flask import Blueprint,Response -from flask_restful import reqparse, Resource, Api -from config import vendors,regions -from units import token_auth,consul_kv -from units.prom import mysql_huawei,mysql_ali,mysql_tencent -import json -blueprint = Blueprint('cloud_mysql_metrics',__name__) -api = Api(blueprint) - -class Exporter(Resource): - def get(self,vendor,account,region): - if vendor == 'huaweicloud': - prom_metric_list = mysql_huawei.exporter(vendor,account,region) - elif vendor == 'alicloud': - prom_metric_list = mysql_ali.exporter(vendor,account,region) - elif vendor == 'tencent_cloud': - prom_metric_list = mysql_tencent.exporter(vendor,account,region) - return Response('\n'.join(prom_metric_list).encode('utf-8'),mimetype="text/plain") -api.add_resource(Exporter, '/api/cloud_mysql_metrics///') diff --git a/vue-consul/src/main.js b/vue-consul/src/main.js index 90b5f27..766feb1 100644 --- a/vue-consul/src/main.js +++ b/vue-consul/src/main.js @@ -46,7 +46,7 @@ Object.keys(filters).forEach(key => { }) Vue.config.productionTip = false -Vue.prototype.VER = 'v0.10.2' +Vue.prototype.VER = 'v0.11.0-alpha' new Vue({ el: '#app', diff --git a/vue-consul/src/views/redis/grafana.vue b/vue-consul/src/views/redis/grafana.vue index cbbae08..1c9254e 100644 --- a/vue-consul/src/views/redis/grafana.vue +++ b/vue-consul/src/views/redis/grafana.vue @@ -4,16 +4,11 @@ - Grafana 看板详情: - https://grafana.com/grafana/dashboards/17320

- Grafana 看板ID:17320

- -
diff --git a/vue-consul/src/views/redis/lists.vue b/vue-consul/src/views/redis/lists.vue index c7adc24..b7a54bb 100644 --- a/vue-consul/src/views/redis/lists.vue +++ b/vue-consul/src/views/redis/lists.vue @@ -47,21 +47,21 @@ > - - + + - - - + + +