[root@tick:/var/lib/kapacitor/task] ID: nginx_alert2 Error: Template: Type: stream Status: enabled Executing: true Created: 06 Sep 19 15:38 CST Modified: 06 Sep 19 16:08 CST LastEnabled: 06 Sep 19 16:08 CST Databases Retention Policies: ["telegraf"."autogen"] TICKscript: dbrp "telegraf"."autogen"
var db = 'telegraf'
var rp = 'autogen'
var measurement = 'nginx'
var groupBy = ['host', 'server', 'port']
var name = '主机CPU使用率大于10%'
var idVar = name
var message = '{\'alert\': \'主机 {{ index .Tags }} Nginx2\',\'description\': \'主机 {{ index .Tags "host" }} CPU 使用率 {{ index .Fields "value" }}% ,CPU 使用率过高会导致系统运行缓慢,应用出现异常等问题\',\'suggestion\': \'请查看CPU使用率高的进程/应用是否为异常导致\'}'
var idTag = 'alertID'
var levelTag = 'level'
var messageField = 'message'
var durationField = 'duration'
var outputDB = 'chronograf'
var outputRP = 'autogen'
var outputMeasurement = 'alerts'
var triggerType = 'threshold'
var crit = 0
var data = stream |from() .database(db) .retentionPolicy(rp) .measurement(measurement) .groupBy(groupBy) |eval(lambda: "active") .as('active_value')
var datav = stream |from() .database(db) .retentionPolicy(rp) .measurement(measurement) .groupBy(groupBy) |eval(lambda: "accepts") .as('accepts_value')
var new = data |join(datav) .as('active', 'accepts') |eval(lambda: int("active.active_value") + int("accepts.accepts_value")) .as('new_value')
var trigger = new |alert() .crit(lambda: "new_value" > crit) .message(message) .id(idVar) .idTag(idTag) .levelTag(levelTag) .messageField(messageField) .durationField(durationField) .victorOps()
trigger |eval(lambda: float("new_value")) .as('value') .keep() |influxDBOut() .create() .database(outputDB) .retentionPolicy(outputRP) .measurement(outputMeasurement)
DOT: digraph nginx_alert2 { graph [throughput="0.00 points/s"];
stream0 [avg_exec_time_ns="0s" errors="0" working_cardinality="0" ]; stream0 -> from3 [processed="1"]; stream0 -> from1 [processed="1"];
from3 [avg_exec_time_ns="0s" errors="0" working_cardinality="0" ]; from3 -> eval4 [processed="1"];
eval4 [avg_exec_time_ns="0s" errors="0" working_cardinality="1" ]; eval4 -> join6 [processed="1"];
from1 [avg_exec_time_ns="0s" errors="0" working_cardinality="0" ]; from1 -> eval2 [processed="1"];
eval2 [avg_exec_time_ns="0s" errors="0" working_cardinality="1" ]; eval2 -> join6 [processed="1"];
join6 [avg_exec_time_ns="27.314µs" errors="0" working_cardinality="1" ]; join6 -> eval7 [processed="1"];
eval7 [avg_exec_time_ns="0s" errors="0" working_cardinality="1" ]; eval7 -> alert8 [processed="1"];
alert8 [alerts_inhibited="0" alerts_triggered="1" avg_exec_time_ns="8.445753ms" crits_triggered="1" errors="0" infos_triggered="0" oks_triggered="0" warns_triggered="0" working_cardinality="1" ]; alert8 -> eval9 [processed="1"];
eval9 [avg_exec_time_ns="0s" errors="0" working_cardinality="1" ]; eval9 -> influxdb_out10 [processed="1"];
influxdb_out10 [avg_exec_time_ns="0s" errors="0" points_written="0" working_cardinality="0" write_errors="0" ]; } [root@tick:/var/lib/kapacitor/task] Connected to http://localhost:8086 version 1.7.7 InfluxDB shell version: 1.7.7 > select * from alerts order by time desc limit 1; name: alerts time active_value alertID alertName cpu duration host level message new_value port server triggerType value ---- ------------ ------- --------- --- -------- ---- ----- ------- --------- ---- ------ ----------- ----- 1567757340000000000 1 主机CPU使用率大于10% 720000000000 influxdb CRITICAL {'alert': '主机 map[port:801 server:localhost host:influxdb] Nginx','description': '主机 influxdb CPU 使用率 <no value>% ,CPU 使用率过高会导致系统运行缓慢,应用出现异常等问题','suggestion': '请查看CPU使用率高的进程/应用是否为异常导致'} 65 801 localhost 1 >
|