修改告警系统监控项,
This commit is contained in:
parent
75929c4c8a
commit
59e337065b
|
|
@ -0,0 +1,214 @@
|
|||
package org.jeecg.modules.Util;
|
||||
|
||||
import org.jeecg.modules.base.entity.monitor.Item;
|
||||
import org.jeecg.modules.entity.MonitorItem;
|
||||
import org.jeecg.modules.entity.ValueType;
|
||||
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 监控项配置类 - 包含所有预定义的监控项
|
||||
*/
|
||||
public class MonitorConfig {
|
||||
|
||||
// 默认主机ID
|
||||
public static final String DEFAULT_HOST_ID = "host-001";
|
||||
|
||||
// 默认状态
|
||||
public static final String DEFAULT_STATUS = "active";
|
||||
|
||||
// 按类别分组存储监控项
|
||||
private static final Map<String, List<Item>> CATEGORY_ITEMS = new HashMap<>();
|
||||
// ID生成器
|
||||
private static int nextId = 1001;
|
||||
|
||||
static {
|
||||
initializeAllItems();
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化所有监控项
|
||||
*/
|
||||
private static void initializeAllItems() {
|
||||
// 生成ID列表
|
||||
Map<String, Integer> idMap = new HashMap<>();
|
||||
|
||||
// CPU相关监控项
|
||||
List<Item> cpuItems = Arrays.asList(
|
||||
createItem(1001, "cpu_usage_percent", "CPU使用率百分比,反映服务器计算资源占用情况", "%", ValueType.FLOAT),
|
||||
createItem(1002, "cpuUtilization", "CPU utilization in %", "%", ValueType.FLOAT),
|
||||
createItem(1003, "system_load_average", "系统平均负载,1分钟内等待运行的进程平均数", "load", ValueType.FLOAT)
|
||||
);
|
||||
|
||||
// 内存相关监控项
|
||||
List<Item> memoryItems = Arrays.asList(
|
||||
createItem(1101, "memory_used_percent", "内存使用率,服务器物理内存占用比例", "%", ValueType.FLOAT),
|
||||
createItem(1102, "memoryUtilization", "Memory used percentage is calculated as (100-pavailable)", "%", ValueType.FLOAT),
|
||||
createItem(1103, "memAvailable", "服务器物理内存可用空间", "B", ValueType.LONG),
|
||||
createItem(1104, "swapUtilization", "服务器交换内存使用率", "%", ValueType.FLOAT),
|
||||
createItem(1105, "swapTotalSize", "The total space of swap volume/file in bytes", "B", ValueType.LONG),
|
||||
createItem(1106, "freeSwapSpace", "The free space of swap volume/file in bytes", "B", ValueType.LONG),
|
||||
createItem(1107, "swap_used_percent", "交换分区(Swap)使用率,内存不足时的虚拟内存占用", "%", ValueType.FLOAT)
|
||||
);
|
||||
|
||||
// 磁盘相关监控项
|
||||
List<Item> diskItems = Arrays.asList(
|
||||
createItem(1201, "disk_io_read_bytes", "磁盘读取吞吐量,每秒读取的数据量", "bytes/s", ValueType.FLOAT),
|
||||
createItem(1202, "disk_io_write_bytes", "磁盘写入吞吐量,每秒写入的数据量", "bytes/s", ValueType.FLOAT)
|
||||
);
|
||||
|
||||
// 网络相关监控项
|
||||
List<Item> networkItems = Arrays.asList(
|
||||
createItem(1301, "network_receive_bytes", "网络接收流量,网卡入站数据传输速率", "bytes/s", ValueType.FLOAT),
|
||||
createItem(1302, "network_transmit_bytes", "网络发送流量,网卡出站数据传输速率", "bytes/s", ValueType.FLOAT),
|
||||
createItem(1303, "throughput", "网卡吞吐量", "Kb/s", ValueType.FLOAT),
|
||||
createItem(1304, "latency_ms", "响应延迟(毫秒)", "ms", ValueType.FLOAT),
|
||||
createItem(1305, "latency_s", "响应时间(秒)", "s", ValueType.FLOAT),
|
||||
createItem(1306, "tcp_established_connections", "当前建立的TCP连接数,反映网络连接活跃状态", "count", ValueType.INT)
|
||||
);
|
||||
|
||||
// 进程相关监控项
|
||||
List<Item> processItems = Arrays.asList(
|
||||
createItem(1401, "process_count", "当前运行中的进程总数", "count", ValueType.INT)
|
||||
);
|
||||
|
||||
// 数据库相关监控项
|
||||
List<Item> databaseItems = Arrays.asList(
|
||||
createItem(1501, "dbMemory", "数据库内存", "Kb", ValueType.LONG),
|
||||
createItem(1502, "dblSize", "数据库文件占用空间", "B", ValueType.LONG)
|
||||
);
|
||||
|
||||
// 应用相关监控项
|
||||
List<Item> appItems = Arrays.asList(
|
||||
createItem(1601, "responseSuccessRate", "服务器响应成功率", "%", ValueType.FLOAT),
|
||||
createItem(1602, "login", "登录数", "", ValueType.INT),
|
||||
createItem(1603, "connections", "连接数", "", ValueType.INT)
|
||||
);
|
||||
|
||||
// 日志相关监控项
|
||||
List<Item> logItems = Arrays.asList(
|
||||
createItem(1701, "logRemainingSize", "日志剩余空间", "B", ValueType.LONG)
|
||||
);
|
||||
|
||||
// 系统负载
|
||||
List<Item> systemItems = Arrays.asList(
|
||||
createItem(1801, "load", "系统负载", "", ValueType.FLOAT)
|
||||
);
|
||||
|
||||
// 添加到类别映射
|
||||
CATEGORY_ITEMS.put("cpu", cpuItems);
|
||||
CATEGORY_ITEMS.put("memory", memoryItems);
|
||||
CATEGORY_ITEMS.put("swap", memoryItems.stream()
|
||||
.filter(item -> item.getName().contains("swap") || item.getName().equals("swap"))
|
||||
.collect(Collectors.toList()));
|
||||
CATEGORY_ITEMS.put("disk", diskItems);
|
||||
CATEGORY_ITEMS.put("network", networkItems);
|
||||
CATEGORY_ITEMS.put("process", processItems);
|
||||
CATEGORY_ITEMS.put("database", databaseItems);
|
||||
CATEGORY_ITEMS.put("application", appItems);
|
||||
CATEGORY_ITEMS.put("log", logItems);
|
||||
CATEGORY_ITEMS.put("system", systemItems);
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建监控项
|
||||
*/
|
||||
private static MonitorItem createItem(int id, String name, String description, String units, ValueType valueType) {
|
||||
return new MonitorItem(String.valueOf(id), name, DEFAULT_HOST_ID, description, units, DEFAULT_STATUS, valueType);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有监控项
|
||||
*/
|
||||
public static List<Item> getAllItems() {
|
||||
return CATEGORY_ITEMS.values().stream()
|
||||
.flatMap(List::stream)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* 按类别获取监控项
|
||||
*/
|
||||
public static List<Item> getItemsByCategory(String category) {
|
||||
return CATEGORY_ITEMS.getOrDefault(category.toLowerCase(), Collections.emptyList());
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过名称查找监控项
|
||||
*/
|
||||
public static Item findByName(String name) {
|
||||
return getAllItems().stream()
|
||||
.filter(item -> item.getName().equals(name))
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过ID查找监控项
|
||||
*/
|
||||
public static Item findById(String itemId) {
|
||||
return getAllItems().stream()
|
||||
.filter(item -> item.getItemId().equals(itemId))
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过数字ID查找监控项
|
||||
*/
|
||||
public static Item findById(int itemId) {
|
||||
return findById(String.valueOf(itemId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取指定主机的所有监控项
|
||||
*/
|
||||
public static List<Item> getItemsByHost(String hostId) {
|
||||
return getAllItems().stream()
|
||||
.filter(item -> item.getHostId().equals(hostId))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证监控项是否存在
|
||||
*/
|
||||
public static boolean contains(String name) {
|
||||
return findByName(name) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有监控项名称
|
||||
*/
|
||||
public static List<String> getAllItemNames() {
|
||||
return getAllItems().stream()
|
||||
.map(Item::getName)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有类别
|
||||
*/
|
||||
public static List<String> getAllCategories() {
|
||||
return new ArrayList<>(CATEGORY_ITEMS.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取监控项统计信息
|
||||
*/
|
||||
public static Map<String, Object> getStatistics() {
|
||||
Map<String, Object> stats = new HashMap<>();
|
||||
stats.put("totalItems", getAllItems().size());
|
||||
stats.put("categories", getAllCategories());
|
||||
|
||||
Map<String, Integer> categoryCounts = new HashMap<>();
|
||||
for (Map.Entry<String, List<Item>> entry : CATEGORY_ITEMS.entrySet()) {
|
||||
categoryCounts.put(entry.getKey(), entry.getValue().size());
|
||||
}
|
||||
stats.put("categoryCounts", categoryCounts);
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,682 @@
|
|||
package org.jeecg.modules.Util;
|
||||
|
||||
import cn.hutool.core.io.FileUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.jcraft.jsch.*;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jeecg.modules.base.entity.Rule;
|
||||
import org.jeecg.modules.base.entity.postgre.AlarmRule;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* Prometheus告警规则管理器
|
||||
* Prometheus服务器,管理告警规则
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class PrometheusAlertManager {
|
||||
|
||||
private Session session;
|
||||
private ChannelExec channelExec;
|
||||
private ChannelSftp channelSftp;
|
||||
|
||||
|
||||
|
||||
// 连接配置
|
||||
@Value("${prometheus.ssh.host:127.0.0.1}")
|
||||
private String host;
|
||||
@Value("${prometheus.ssh.userName:rmsops}")
|
||||
private String username;
|
||||
@Value("${prometheus.ssh.password:cnndc66367220}")
|
||||
private String password;
|
||||
@Value("${prometheus.ssh.port:22}")
|
||||
private int port;
|
||||
|
||||
// 本地路径缓存
|
||||
private final Map<String, String> localPathCache = new ConcurrentHashMap<>();
|
||||
|
||||
// 优化的超时配置
|
||||
private int connectionTimeout = 5000; // 本地5秒,远程30秒
|
||||
private int commandTimeout = 10000; // 本地10秒,远程60秒
|
||||
private int sftpTimeout = 8000; // 本地8秒,远程30秒
|
||||
|
||||
// Prometheus配置
|
||||
@Value("${prometheus.path.home:/opt/prometheus/}")
|
||||
private String prometheusHome;
|
||||
@Value("${prometheus.path.configPath:/opt/prometheus/conf/}")
|
||||
private String prometheusConfigPath;
|
||||
@Value("${prometheus.path.alertRulesPath:/opt/prometheus/rules/}")
|
||||
private String alertRulesPath;
|
||||
@Value("${prometheus.path.promtoolPath:/opt/prometheus/bin/}")
|
||||
private String promtoolPath;
|
||||
@Value("${prometheus.url:http://172.21.170.11}")
|
||||
private String prometheusHost;
|
||||
@Value("${prometheus.prometheusPort:9090}")
|
||||
private int prometheusPort;
|
||||
// 本地模式标识
|
||||
@Value("${prometheus.ssh.isLocalMode:true}")
|
||||
private boolean isLocalMode;
|
||||
|
||||
/**
|
||||
* 构造函数
|
||||
*/
|
||||
public PrometheusAlertManager() {
|
||||
if (isLocalMode) {
|
||||
adjustLocalTimeouts();
|
||||
log.info("运行在本地模式");
|
||||
} else {
|
||||
log.info("运行在远程模式");
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 本地模式下调整超时时间
|
||||
*/
|
||||
private void adjustLocalTimeouts() {
|
||||
connectionTimeout = 5000;
|
||||
commandTimeout = 10000;
|
||||
sftpTimeout = 8000;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取SSH会话
|
||||
*/
|
||||
private Session getOrCreateSession() throws JSchException {
|
||||
if (session == null || !session.isConnected()) {
|
||||
JSch jsch = new JSch();
|
||||
session = jsch.getSession(username, host, port);
|
||||
session.setPassword(password);
|
||||
|
||||
Properties config = new Properties();
|
||||
config.put("StrictHostKeyChecking", "no");
|
||||
session.setConfig(config);
|
||||
session.connect(connectionTimeout);
|
||||
}
|
||||
return session;
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行命令 - 本地模式优化
|
||||
*/
|
||||
private String executeCommand(String command) throws Exception {
|
||||
if (isLocalMode) {
|
||||
return executeLocalCommand(command);
|
||||
} else {
|
||||
return executeRemoteCommand(command);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 本地命令执行
|
||||
*/
|
||||
private String executeLocalCommand(String command) throws Exception {
|
||||
ProcessBuilder processBuilder = new ProcessBuilder("/bin/bash", "-c", command);
|
||||
Process process = processBuilder.start();
|
||||
|
||||
StringBuilder output = new StringBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream()))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
output.append(line).append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
int exitCode = process.waitFor();
|
||||
if (exitCode != 0) {
|
||||
throw new RuntimeException("Command failed with exit code: " + exitCode + ", Output: " +
|
||||
output.toString());
|
||||
}
|
||||
|
||||
return output.toString().trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 远程命令执行
|
||||
*/
|
||||
private String executeRemoteCommand(String command) throws Exception {
|
||||
channelExec = (ChannelExec) getOrCreateSession().openChannel("exec");
|
||||
channelExec.setCommand(command);
|
||||
channelExec.connect(commandTimeout);
|
||||
|
||||
StringBuilder output = new StringBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(channelExec.getInputStream()))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
output.append(line).append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
int exitCode = channelExec.getExitStatus();
|
||||
channelExec.disconnect();
|
||||
|
||||
if (exitCode != 0) {
|
||||
throw new RuntimeException("Command failed with exit code: " + exitCode + ", Output: " +
|
||||
output.toString());
|
||||
}
|
||||
|
||||
return output.toString().trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取SFTP通道
|
||||
*/
|
||||
private ChannelSftp getSftpChannel() throws JSchException {
|
||||
if (channelSftp == null || channelSftp.isClosed()) {
|
||||
channelSftp = (ChannelSftp) getOrCreateSession().openChannel("sftp");
|
||||
channelSftp.connect(sftpTimeout);
|
||||
}
|
||||
return channelSftp;
|
||||
}
|
||||
|
||||
/**
|
||||
* 文件上传 - 本地模式优化
|
||||
*/
|
||||
public void uploadFile(String localFilePath, String remoteFilePath) throws Exception {
|
||||
if (isLocalMode) {
|
||||
// 本地模式:直接复制文件
|
||||
copyLocalFile(localFilePath, remoteFilePath);
|
||||
} else {
|
||||
// 远程模式:使用SFTP
|
||||
ChannelSftp sftp = getSftpChannel();
|
||||
sftp.put(localFilePath, remoteFilePath);
|
||||
}
|
||||
log.info("文件上传完成: {} -> {}", localFilePath, remoteFilePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* 本地文件复制
|
||||
*/
|
||||
private void copyLocalFile(String source, String destination) throws IOException {
|
||||
File srcFile = new File(source);
|
||||
File destFile = new File(destination);
|
||||
|
||||
// 确保目标目录存在
|
||||
File destDir = destFile.getParentFile();
|
||||
if (destDir != null && !destDir.exists()) {
|
||||
destDir.mkdirs();
|
||||
}
|
||||
|
||||
try (FileInputStream fis = new FileInputStream(srcFile);
|
||||
FileOutputStream fos = new FileOutputStream(destFile)) {
|
||||
byte[] buffer = new byte[8192];
|
||||
int length;
|
||||
while ((length = fis.read(buffer)) > 0) {
|
||||
fos.write(buffer, 0, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 列出目录内容 - 本地模式优化
|
||||
*/
|
||||
public List<String> listDirectory(String directory) throws Exception {
|
||||
if (isLocalMode) {
|
||||
return listLocalDirectory(directory);
|
||||
} else {
|
||||
return listRemoteDirectory(directory);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 本地目录列出
|
||||
*/
|
||||
private List<String> listLocalDirectory(String directory) {
|
||||
List<String> files = new ArrayList<>();
|
||||
File dir = new File(directory);
|
||||
|
||||
if (dir.exists() && dir.isDirectory()) {
|
||||
File[] fileList = dir.listFiles();
|
||||
if (fileList != null) {
|
||||
for (File file : fileList) {
|
||||
if (!file.getName().startsWith(".")) { // 排除隐藏文件
|
||||
files.add(file.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
/**
|
||||
* 远程目录列出
|
||||
*/
|
||||
private List<String> listRemoteDirectory(String directory) throws SftpException, JSchException {
|
||||
ChannelSftp sftp = getSftpChannel();
|
||||
Vector<ChannelSftp.LsEntry> entries = sftp.ls(directory);
|
||||
|
||||
List<String> files = new ArrayList<>();
|
||||
for (ChannelSftp.LsEntry entry : entries) {
|
||||
String filename = entry.getFilename();
|
||||
if (!filename.startsWith(".")) { // 排除隐藏文件
|
||||
files.add(filename);
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
/**
|
||||
* 文件是否存在 - 本地模式优化
|
||||
*/
|
||||
public boolean fileExists(String filePath) throws Exception {
|
||||
if (isLocalMode) {
|
||||
return new File(filePath).exists();
|
||||
} else {
|
||||
try {
|
||||
ChannelSftp sftp = getSftpChannel();
|
||||
sftp.stat(filePath);
|
||||
return true;
|
||||
} catch (SftpException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除文件 - 本地模式优化
|
||||
*/
|
||||
public void deleteFile(String filePath) throws Exception {
|
||||
if (isLocalMode) {
|
||||
File file = new File(filePath);
|
||||
if (file.exists()) {
|
||||
file.delete();
|
||||
}
|
||||
} else {
|
||||
ChannelSftp sftp = getSftpChannel();
|
||||
sftp.rm(filePath);
|
||||
}
|
||||
log.info("文件删除完成: {}", filePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建目录 - 本地模式优化
|
||||
*/
|
||||
public void createDirectory(String directory) throws Exception {
|
||||
if (isLocalMode) {
|
||||
File dir = new File(directory);
|
||||
if (!dir.exists()) {
|
||||
dir.mkdirs();
|
||||
}
|
||||
} else {
|
||||
ChannelSftp sftp = getSftpChannel();
|
||||
try {
|
||||
sftp.mkdir(directory);
|
||||
} catch (SftpException e) {
|
||||
// 目录可能已存在
|
||||
log.debug("目录创建失败,可能已存在: {}", directory);
|
||||
}
|
||||
}
|
||||
log.info("目录操作完成: {}", directory);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 根据 AlarmRule 实体保存或更新 Prometheus 告警规则
|
||||
*/
|
||||
public boolean saveOrUpdateAlarmRule(AlarmRule alarmRule) throws Exception {
|
||||
if (alarmRule.getEnabled() == null || alarmRule.getEnabled() != 1) {
|
||||
log.warn("告警规则 [{}] 未启用,跳过保存", alarmRule.getName());
|
||||
return false;
|
||||
}
|
||||
|
||||
// 1. 生成 YAML 内容
|
||||
String ruleContent = generatePrometheusAlertRule(alarmRule);
|
||||
// 2. 使用 ID 作为文件名,确保唯一性
|
||||
String fileName = alarmRule.getId() + ".yml";
|
||||
String fullPath = FileUtil.normalize(alertRulesPath + "/" + fileName);
|
||||
|
||||
// 3. 写入临时文件并上传
|
||||
String tempPath = System.getProperty("java.io.tmpdir")+ fileName;
|
||||
writeTempFile(tempPath, ruleContent);
|
||||
|
||||
// 4. 如果原文件存在则备份
|
||||
if (fileExists(fullPath)) {
|
||||
backupOriginalFile(fullPath);
|
||||
}
|
||||
|
||||
uploadFile(tempPath, fullPath);
|
||||
|
||||
// 5. 验证规则语法
|
||||
if (!validateAlertRules(fullPath)) {
|
||||
log.error("告警规则 [{}] 验证失败,回滚更改", alarmRule.getName());
|
||||
restoreBackupFile(fullPath);
|
||||
throw new Exception("告警规则语法验证失败");
|
||||
}
|
||||
|
||||
// 6. 重载 Prometheus
|
||||
reloadPrometheus();
|
||||
log.info("告警规则 [{}] 已成功同步至 Prometheus", alarmRule.getName());
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* 校验告警规则文件的语法 (调用 promtool check rules)
|
||||
*/
|
||||
private boolean validateAlertRules(String filePath) throws Exception {
|
||||
String promtoolPath = prometheusHome + "/promtool";
|
||||
String command = promtoolPath + " check rules " + filePath;
|
||||
try {
|
||||
String result = executeCommand(command);
|
||||
return result.toLowerCase().contains("success") || result.toLowerCase().contains("check passed");
|
||||
} catch (Exception e) {
|
||||
log.error("规则文件验证异常: {}", e.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 将 AlarmRule 实体转换为 Prometheus YAML 字符串
|
||||
*/
|
||||
private String generatePrometheusAlertRule(AlarmRule alarmRule) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("groups:\n");
|
||||
sb.append("- name: ").append(alarmRule.getName()).append("_group\n");
|
||||
sb.append(" rules:\n");
|
||||
sb.append(" - alert: ").append(sanitizeMetricName(alarmRule.getName())).append("\n");
|
||||
|
||||
// 生成 PromQL 表达式
|
||||
String expr = buildExpression(alarmRule);
|
||||
sb.append(" expr: ").append(expr).append("\n");
|
||||
|
||||
// 沉默周期/持续时间
|
||||
if (alarmRule.getSilenceCycle() != null) {
|
||||
sb.append(" for: ").append(convertSecondsToDuration(alarmRule.getSilenceCycle())).append("\n");
|
||||
}
|
||||
|
||||
// 标签
|
||||
sb.append(" labels:\n");
|
||||
sb.append(" severity: ").append(getSeverityLevel(alarmRule)).append("\n");
|
||||
sb.append(" source_type: \"").append(defaultString(alarmRule.getSourceType())).append("\"\n");
|
||||
sb.append(" resource_id: \"").append(defaultString(alarmRule.getSourceId())).append("\"\n");
|
||||
sb.append(" contact_group: \"").append(defaultString(alarmRule.getContactId())).append("\"\n");
|
||||
|
||||
// 注解
|
||||
Rule ruleObj = alarmRule.getRule();
|
||||
String conditionDesc = (ruleObj != null) ? ruleObj.joint() : "指标异常";
|
||||
|
||||
sb.append(" annotations:\n");
|
||||
sb.append(" summary: \"").append(alarmRule.getName()).append(" - ").append(conditionDesc).append("\"\n");
|
||||
sb.append(" description: \"告警名称: ").append(alarmRule.getName())
|
||||
.append("\\n资源ID: ").append(defaultString(alarmRule.getSourceId()))
|
||||
.append("\\n监控条件: ").append(conditionDesc).append("\"\n");
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
/**
|
||||
* 构建 PromQL 表达式
|
||||
*/
|
||||
private String buildExpression(AlarmRule alarmRule) {
|
||||
Rule rule = alarmRule.getRule();
|
||||
if (rule != null && StrUtil.isNotBlank(rule.getName()) && rule.getThreshold() != null) {
|
||||
// 先使用 Rule 实体中的定义
|
||||
return rule.getName() + " " + rule.getOperator() + " " + rule.getThreshold();
|
||||
}
|
||||
// 最后使用 itemId 和默认阈值
|
||||
String metric = StrUtil.isNotBlank(alarmRule.getItemId()) ? alarmRule.getItemId() : "unknown_metric";
|
||||
return metric + " > 80";
|
||||
}
|
||||
|
||||
|
||||
|
||||
private String convertSecondsToDuration(Long seconds) {
|
||||
if (seconds >= 86400) return (seconds / 86400) + "d";
|
||||
if (seconds >= 3600) return (seconds / 3600) + "h";
|
||||
if (seconds >= 60) return (seconds / 60) + "m";
|
||||
return seconds + "s";
|
||||
}
|
||||
|
||||
private String getSeverityLevel(AlarmRule alarmRule) {
|
||||
// 可以根据 notification 或其他字段映射严重级别
|
||||
return "warning";
|
||||
}
|
||||
|
||||
private String sanitizeMetricName(String name) {
|
||||
// 将中文或特殊字符替换为下划线,保证符合 Prometheus 命名规范
|
||||
return name.replaceAll("[^a-zA-Z0-9_]", "_");
|
||||
}
|
||||
|
||||
private String defaultString(String str) {
|
||||
return StrUtil.isBlank(str) ? "unknown" : str;
|
||||
}
|
||||
|
||||
/**
|
||||
* 写入临时文件
|
||||
*/
|
||||
private void writeTempFile(String filePath, String content) throws IOException {
|
||||
File file = new File(filePath);
|
||||
File parentDir = file.getParentFile();
|
||||
if (parentDir != null && !parentDir.exists()) parentDir.mkdirs();
|
||||
try (FileWriter writer = new FileWriter(file)) {
|
||||
writer.write(content);
|
||||
}
|
||||
}
|
||||
|
||||
private void backupOriginalFile(String filePath) throws Exception {
|
||||
String backupPath = filePath + ".backup";
|
||||
if (isLocalMode) {
|
||||
File original = new File(filePath);
|
||||
File backup = new File(backupPath);
|
||||
java.nio.file.Files.copy(original.toPath(), backup.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
|
||||
} else {
|
||||
ChannelSftp sftp = getSftpChannel();
|
||||
sftp.rename(filePath, backupPath);
|
||||
}
|
||||
log.info("已备份文件: {} -> {}", filePath, backupPath);
|
||||
}
|
||||
|
||||
private void restoreBackupFile(String filePath) throws Exception {
|
||||
String backupPath = filePath + ".backup";
|
||||
if (fileExists(backupPath)) {
|
||||
if (isLocalMode) {
|
||||
File backup = new File(backupPath);
|
||||
File original = new File(filePath);
|
||||
java.nio.file.Files.copy(backup.toPath(), original.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
|
||||
backup.delete();
|
||||
} else {
|
||||
ChannelSftp sftp = getSftpChannel();
|
||||
sftp.rename(backupPath, filePath);
|
||||
}
|
||||
log.info("已从备份恢复文件: {}", filePath);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 验证Prometheus配置
|
||||
*/
|
||||
public boolean validatePrometheusConfig() throws Exception {
|
||||
String command = promtoolPath + " check config " + prometheusConfigPath;
|
||||
String result = executeCommand(command);
|
||||
|
||||
log.info("Prometheus配置验证结果: {}", result);
|
||||
|
||||
// 检查输出中是否包含"SUCCESS"或无错误信息
|
||||
return result.toLowerCase().contains("success") ||
|
||||
(!result.toLowerCase().contains("error") &&
|
||||
!result.toLowerCase().contains("failed"));
|
||||
}
|
||||
|
||||
/**
|
||||
* 重新加载Prometheus
|
||||
*/
|
||||
public void reloadPrometheus() throws Exception {
|
||||
String command =
|
||||
"curl -X POST http://" + prometheusHost + ":" + prometheusPort + "/-/reload";
|
||||
String result = executeCommand(command);
|
||||
|
||||
log.info("Prometheus重载结果: {}", result);
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存告警规则到文件
|
||||
*/
|
||||
public void saveAlertRuleToFile(String ruleName, String ruleContent) throws Exception {
|
||||
String tempRulePath = "/tmp/" + ruleName + ".yml";
|
||||
String finalRulePath = alertRulesPath + "/" + ruleName + ".yml";
|
||||
|
||||
// 写入临时文件
|
||||
writeTempFile(tempRulePath, ruleContent);
|
||||
|
||||
// 上传到目标位置
|
||||
uploadFile(tempRulePath, finalRulePath);
|
||||
|
||||
// 验证配置
|
||||
if (!validatePrometheusConfig()) {
|
||||
// 如果验证失败,回滚更改
|
||||
deleteFile(finalRulePath);
|
||||
throw new Exception("告警规则配置验证失败");
|
||||
}
|
||||
|
||||
// 重新加载Prometheus
|
||||
reloadPrometheus();
|
||||
|
||||
log.info("告警规则保存成功: {}", ruleName);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 获取所有告警规则
|
||||
*/
|
||||
public List<String> getAllAlertRules() throws Exception {
|
||||
List<String> rules = new ArrayList<>();
|
||||
|
||||
// 获取alert_rules目录下的所有YAML文件
|
||||
List<String> files = listDirectory(alertRulesPath);
|
||||
for (String file : files) {
|
||||
if (file.toLowerCase().endsWith(".yml") || file.toLowerCase().endsWith(".yaml")) {
|
||||
rules.add(file);
|
||||
}
|
||||
}
|
||||
|
||||
return rules;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取告警规则内容
|
||||
*/
|
||||
public String getAlertRuleContent(String ruleName) throws Exception {
|
||||
String rulePath = alertRulesPath + "/" + ruleName;
|
||||
|
||||
if (isLocalMode) {
|
||||
// 本地模式:直接读取文件
|
||||
try (BufferedReader reader = new BufferedReader(new FileReader(rulePath))) {
|
||||
StringBuilder content = new StringBuilder();
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
content.append(line).append("\n");
|
||||
}
|
||||
return content.toString();
|
||||
}
|
||||
} else {
|
||||
// 远程模式:使用SFTP下载
|
||||
ChannelSftp sftp = getSftpChannel();
|
||||
try (InputStream inputStream = sftp.get(rulePath);
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
|
||||
|
||||
StringBuilder content = new StringBuilder();
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
content.append(line).append("\n");
|
||||
}
|
||||
return content.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除告警规则
|
||||
*/
|
||||
public void deleteAlertRule(String ruleName) throws Exception {
|
||||
String rulePath = alertRulesPath + "/" + ruleName;
|
||||
|
||||
// 删除规则文件
|
||||
deleteFile(rulePath);
|
||||
|
||||
// 验证配置
|
||||
if (!validatePrometheusConfig()) {
|
||||
throw new Exception("删除告警规则后配置验证失败");
|
||||
}
|
||||
|
||||
// 重新加载Prometheus
|
||||
reloadPrometheus();
|
||||
|
||||
log.info("告警规则删除成功: {}", ruleName);
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新告警规则
|
||||
*/
|
||||
public void updateAlertRule(String ruleName, String newRuleContent) throws Exception {
|
||||
String tempRulePath = "/tmp/" + ruleName + ".yml";
|
||||
String finalRulePath = alertRulesPath + "/" + ruleName;
|
||||
|
||||
// 写入临时文件
|
||||
writeTempFile(tempRulePath, newRuleContent);
|
||||
|
||||
// 上传到目标位置
|
||||
uploadFile(tempRulePath, finalRulePath);
|
||||
|
||||
// 验证配置
|
||||
if (!validatePrometheusConfig()) {
|
||||
throw new Exception("更新告警规则后配置验证失败");
|
||||
}
|
||||
|
||||
// 重新加载Prometheus
|
||||
reloadPrometheus();
|
||||
|
||||
log.info("告警规则更新成功: {}", ruleName);
|
||||
}
|
||||
|
||||
/**
|
||||
* 关闭所有连接
|
||||
*/
|
||||
public void close() {
|
||||
if (channelExec != null && channelExec.isConnected()) {
|
||||
channelExec.disconnect();
|
||||
}
|
||||
|
||||
if (channelSftp != null && channelSftp.isConnected()) {
|
||||
channelSftp.disconnect();
|
||||
}
|
||||
|
||||
if (session != null && session.isConnected()) {
|
||||
session.disconnect();
|
||||
}
|
||||
|
||||
log.info("Prometheus Alert Manager 已关闭");
|
||||
}
|
||||
|
||||
// Getters
|
||||
public String getHost() {
|
||||
return host;
|
||||
}
|
||||
|
||||
public String getAlertRulesPath() {
|
||||
return alertRulesPath;
|
||||
}
|
||||
|
||||
public boolean isLocalMode() {
|
||||
return isLocalMode;
|
||||
}
|
||||
|
||||
// Setters
|
||||
public void setLocalMode(boolean localMode) {
|
||||
isLocalMode = localMode;
|
||||
if (localMode) {
|
||||
adjustLocalTimeouts();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -6,6 +6,7 @@ import lombok.Data;
|
|||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jeecg.modules.base.entity.monitor.History;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.cloud.context.config.annotation.RefreshScope;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
|
|
@ -20,10 +21,13 @@ import java.util.*;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
@Component
|
||||
@RefreshScope
|
||||
@Slf4j
|
||||
public class PrometheusUtil {
|
||||
@Value("${prometheus.url:http://172.21.170.11:9090}")
|
||||
@Value("${prometheus.url:http://172.21.170.11}")
|
||||
private String prometheusUrl;
|
||||
@Value("${prometheus.prometheusPort:9090}")
|
||||
private int prometheusPort;
|
||||
|
||||
@Value("${prometheus.connect-timeout:30}")
|
||||
private int connectTimeout;
|
||||
|
|
@ -112,7 +116,7 @@ public class PrometheusUtil {
|
|||
private URI buildQueryUri(String path, String promql, Long start, Long end, Long step) {
|
||||
String encodedQuery = URLEncoder.encode(promql);
|
||||
StringBuilder url = new StringBuilder();
|
||||
url.append(prometheusUrl).append(path)
|
||||
url.append(prometheusUrl + ":" + prometheusPort).append(path)
|
||||
.append("?query=").append(encodedQuery);
|
||||
|
||||
if (start != null) {
|
||||
|
|
@ -126,6 +130,7 @@ public class PrometheusUtil {
|
|||
}
|
||||
|
||||
try {
|
||||
log.info("构建URI: " + url);
|
||||
return new URI(url.toString());
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("构建URI失败: " + url, e);
|
||||
|
|
@ -207,7 +212,7 @@ public class PrometheusUtil {
|
|||
//region 健康检查
|
||||
public boolean healrhChenck() {
|
||||
try {
|
||||
String url = prometheusUrl + "/-/healthy";
|
||||
String url = prometheusUrl + ":" + prometheusPort + "/-/healthy";
|
||||
restTemplate.getForObject(url, String.class);
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
|
|
@ -222,7 +227,7 @@ public class PrometheusUtil {
|
|||
* @return
|
||||
*/
|
||||
public JSONObject buildInfo() {
|
||||
String url = prometheusUrl + "/api/v1/status/buildinfo";
|
||||
String url = prometheusUrl + ":" + prometheusPort + "/api/v1/status/buildinfo";
|
||||
return get(url).getJSONObject("data");
|
||||
}
|
||||
//endregion
|
||||
|
|
@ -239,7 +244,7 @@ public class PrometheusUtil {
|
|||
public List<QueryResult> queryInstantFull(String promql, Long time) {
|
||||
|
||||
StringBuilder url = new StringBuilder();
|
||||
url.append(prometheusUrl).append("/api/v1/query?query=")
|
||||
url.append(prometheusUrl+":"+prometheusPort).append("/api/v1/query?query=")
|
||||
.append(URLEncoder.encode(promql));
|
||||
|
||||
if (time != null) {
|
||||
|
|
@ -402,7 +407,7 @@ public class PrometheusUtil {
|
|||
*/
|
||||
public List<Map<String, String>> series(List<String> selectors, long start, long end) {
|
||||
StringBuilder url = new StringBuilder();
|
||||
url.append(prometheusUrl).append("/api/v1/series?");
|
||||
url.append(prometheusUrl+":"+prometheusPort).append("/api/v1/series?");
|
||||
for (String selector : selectors) {
|
||||
url.append("match[]=").append(URLEncoder.encode(selector))
|
||||
.append("&");
|
||||
|
|
@ -426,13 +431,11 @@ public class PrometheusUtil {
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 获取标签值列表
|
||||
*/
|
||||
public Set<String> labelValues(String labelName) {
|
||||
String url = String.format("%s/api/v1/label/%s/values", prometheusUrl,
|
||||
String url = String.format("%s/api/v1/label/%s/values", prometheusUrl+":"+prometheusPort,
|
||||
URLEncoder.encode(labelName));
|
||||
|
||||
JSONObject json = get(url);
|
||||
|
|
@ -469,7 +472,7 @@ public class PrometheusUtil {
|
|||
* 获取元数据
|
||||
*/
|
||||
public List<MetricMetadata> metadata(String metric) {
|
||||
String url = prometheusUrl + "/api/v1/metadata";
|
||||
String url = prometheusUrl+":"+prometheusPort + "/api/v1/metadata";
|
||||
if (metric != null) {
|
||||
url = url + "&metric=" + URLEncoder.encode(metric);
|
||||
}
|
||||
|
|
@ -500,7 +503,7 @@ public class PrometheusUtil {
|
|||
* @return List<TargetInfo>
|
||||
*/
|
||||
public List<TargetInfo> targets() {
|
||||
String url = prometheusUrl + "/api/v1/targets";
|
||||
String url = prometheusUrl+":"+prometheusPort + "/api/v1/targets";
|
||||
JSONObject json = get(url);
|
||||
JSONArray activeTargets = json.getJSONObject("data").getJSONArray("activeTargets");
|
||||
List<TargetInfo> result = new ArrayList<>();
|
||||
|
|
@ -541,7 +544,7 @@ public class PrometheusUtil {
|
|||
* @return
|
||||
*/
|
||||
public List<AlertRule> alertRules() {
|
||||
String url = prometheusUrl + "/api/v1/alertRules";
|
||||
String url = prometheusUrl+":"+prometheusPort + "/api/v1/alertRules";
|
||||
JSONObject json = get(url);
|
||||
JSONArray groups = json.getJSONObject("data").getJSONArray("groups");
|
||||
List<AlertRule> result = new ArrayList<>();
|
||||
|
|
@ -664,7 +667,34 @@ public class PrometheusUtil {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* 查询数据库挂载的磁盘容量信息
|
||||
*/
|
||||
|
||||
public DiskMetrics queryDiskMetrics(String mountpoint) {
|
||||
// 构建 PromQL 查询
|
||||
String promql = String.format(
|
||||
"node_filesystem_size_bytes{mountpoint=\"%s\"} or " +
|
||||
"node_filesystem_avail_bytes{mountpoint=\"%s\"} or " +
|
||||
"node_filesystem_free_bytes{mountpoint=\"%s\"}",
|
||||
mountpoint, mountpoint, mountpoint
|
||||
);
|
||||
|
||||
// 调用 Prometheus API
|
||||
List<QueryResult> results = queryInstantFull(promql, null);
|
||||
|
||||
return new DiskMetrics();
|
||||
}
|
||||
|
||||
|
||||
//endregion
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Data
|
||||
class DiskMetrics {
|
||||
private Double totalBytes; // 总字节数
|
||||
private Double availableBytes; // 可用字节数
|
||||
private Double freeBytes; // 空闲字节数
|
||||
}
|
||||
|
|
@ -22,6 +22,7 @@ import org.jeecg.common.constant.SymbolConstant;
|
|||
import org.jeecg.common.util.EmailUtil;
|
||||
import org.jeecg.common.util.JDBCUtil;
|
||||
import org.jeecg.common.util.RedisUtil;
|
||||
import org.jeecg.modules.Util.MonitorConfig;
|
||||
import org.jeecg.modules.Util.PrometheusUtil;
|
||||
import org.jeecg.modules.base.dto.NameValue;
|
||||
import org.jeecg.modules.base.entity.monitor.Host;
|
||||
|
|
@ -31,6 +32,7 @@ import org.jeecg.modules.base.entity.postgre.SysDatabase;
|
|||
import org.jeecg.modules.base.entity.postgre.SysEmail;
|
||||
import org.jeecg.modules.base.entity.postgre.SysServer;
|
||||
import org.jeecg.modules.base.enums.ServerStatus;
|
||||
import org.jeecg.modules.entity.MonitorItem;
|
||||
import org.jeecg.modules.feignclient.ManageUtil;
|
||||
import org.jeecg.modules.feignclient.MonitorAlarm;
|
||||
import org.jeecg.modules.qiyeEmail.base.InstanceSDK;
|
||||
|
|
@ -120,14 +122,14 @@ public class StatusAspect {
|
|||
List<Host> hosts = new ArrayList<>();
|
||||
//通过prometheus获取type=db的数据 up{type="db"}
|
||||
List<PrometheusUtil.QueryResult> hostResults =
|
||||
prometheusUtil.queryInstantFull("max by (name) (up{type=\"db\"})", null);
|
||||
prometheusUtil.queryInstantFull("up{type=\"db\"}", null);
|
||||
for (PrometheusUtil.QueryResult result : hostResults) {
|
||||
Host host = new Host();
|
||||
String hostName = result.getMetrics().get("name");
|
||||
String job = result.getMetrics().get("job");
|
||||
String instance = result.getMetrics().get("instance");
|
||||
Double status = result.getValue().getValue();
|
||||
host.setCode(job);
|
||||
int status = (int) result.getValue().getValue();
|
||||
host.setCode(hostName);
|
||||
host.setHostId(instance);
|
||||
host.setName(hostName);
|
||||
host.setStatus(String.valueOf(status));
|
||||
|
|
@ -206,7 +208,7 @@ public class StatusAspect {
|
|||
|
||||
List<PrometheusUtil.QueryResult> hostResults =
|
||||
prometheusUtil.queryInstantFull(
|
||||
"node_boot_time_seconds{instance=\"" + name + "\"}", null);
|
||||
"node_boot_time_seconds{instance=~\"" + name + ".*\"}", null);
|
||||
|
||||
if (ObjectUtil.isNull(hostResults) || CollUtil.isEmpty(hostResults)) {
|
||||
redisUtil.hset(key, id, new NameValue(name, online));
|
||||
|
|
@ -217,7 +219,8 @@ public class StatusAspect {
|
|||
Host host = new Host();
|
||||
String hostName = result.getMetrics().get("__name__");
|
||||
String instance = result.getMetrics().get("instance");
|
||||
Double status = 1.0;
|
||||
int status = 1;
|
||||
host.setCode(instance);
|
||||
host.setHostId(instance);
|
||||
host.setName(hostName);
|
||||
host.setStatus(String.valueOf(status));
|
||||
|
|
@ -240,24 +243,30 @@ public class StatusAspect {
|
|||
ServerStatus.WARN.getValue()), status);
|
||||
redisUtil.hset(key, id, new NameValue(name, online));
|
||||
// 更新该服务器的HostId
|
||||
server.setHostId(host.getHostId());
|
||||
String hostId = host.getHostId();
|
||||
server.setHostId(hostId);
|
||||
serverService.updateById(server);
|
||||
// 同步服务器监控项 获取所有指标
|
||||
Set<String> metric = prometheusUtil.metricNames();
|
||||
List<String> metricNames =new ArrayList<>();
|
||||
metricNames.add("instance=\"172.21.170.11:9090\"");
|
||||
long end = System.currentTimeMillis() / 1000;
|
||||
long start = end - 3600; // 过去1小时
|
||||
prometheusUtil.series(metricNames,start, end);
|
||||
Map<String, Item> itemMap = host.getItems();
|
||||
if (MapUtil.isEmpty(itemMap) || CollUtil.isEmpty(itemMap.values())) {
|
||||
return;
|
||||
}
|
||||
Collection<Item> items = itemMap.values();
|
||||
Collection<Item> monitorItems = MonitorConfig.getAllItems();
|
||||
|
||||
//Set<String> metric = prometheusUtil.metricNames();
|
||||
//List<String> metricNames =new ArrayList<>();
|
||||
//metricNames.add("instance=\""+host.getHostId()+"\"");
|
||||
//long end = System.currentTimeMillis() / 1000;
|
||||
//long start = end - 3600; // 过去1小时
|
||||
//prometheusUtil.series(metricNames,start, end);
|
||||
// Map<String, Item> itemMap = host.getItems();
|
||||
// if (MapUtil.isEmpty(itemMap) || CollUtil.isEmpty(itemMap.values())) {
|
||||
// return;
|
||||
// }
|
||||
//Collection<Item> items = itemMap.values();
|
||||
|
||||
List<AlarmItem> alarmItems = new ArrayList<>();
|
||||
items.forEach(item -> {
|
||||
|
||||
monitorItems.forEach(item -> {
|
||||
AlarmItem alarmItem = BeanUtil.copyProperties(item, AlarmItem.class);
|
||||
alarmItem.setId(item.getItemId());
|
||||
alarmItem.setHostId(hostId);
|
||||
alarmItems.add(alarmItem);
|
||||
});
|
||||
alarmItemService.saveOrUpdateBatch(alarmItems);
|
||||
|
|
|
|||
|
|
@ -12,32 +12,102 @@ public class MetricConfig {
|
|||
public static final Map<String, MetricConfig> METRIC_MAP = new HashMap<>();
|
||||
|
||||
static {
|
||||
// 根据你提供的 getItems 返回结果进行 1:1 映射
|
||||
METRIC_MAP.put("37494", new MetricConfig("cpuUtilization",
|
||||
// 根据之前讨论的固定ID进行映射
|
||||
// CPU相关 (1001-1003)
|
||||
METRIC_MAP.put("1001", new MetricConfig("cpu_usage_percent",
|
||||
"(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[5m]))) * 100",
|
||||
"%"));
|
||||
METRIC_MAP.put("1002", new MetricConfig("cpuUtilization",
|
||||
"(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[1m]))) * 100",
|
||||
"%"));
|
||||
METRIC_MAP.put("37445",
|
||||
new MetricConfig("swapTotalSize", "node_memory_SwapTotal_bytes{instance='%s'}",
|
||||
"B"));
|
||||
METRIC_MAP.put("37469", new MetricConfig("load", "node_load1{instance='%s'}", ""));
|
||||
METRIC_MAP.put("37452", new MetricConfig("swapUtilization",
|
||||
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
|
||||
"%"));
|
||||
METRIC_MAP.put("37443",
|
||||
new MetricConfig("freeSwapSpace", "node_memory_SwapFree_bytes{instance='%s'}",
|
||||
"B"));
|
||||
METRIC_MAP.put("37460", new MetricConfig("latency",
|
||||
"irate(node_disk_read_time_seconds_total{instance='%s'}[1m]) * 1000", "ms"));
|
||||
METRIC_MAP.put("37449", new MetricConfig("memoryUtilization",
|
||||
METRIC_MAP.put("1003", new MetricConfig("system_load_average",
|
||||
"node_load1{instance='%s'}",
|
||||
"load"));
|
||||
|
||||
// 内存相关 (1101-1107)
|
||||
METRIC_MAP.put("1101", new MetricConfig("memory_used_percent",
|
||||
"(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100",
|
||||
"%"));
|
||||
METRIC_MAP.put("37660", new MetricConfig("throughput",
|
||||
"sum(irate(node_network_receive_bytes_total{instance='%s',device!~'lo'}[1m]))",
|
||||
METRIC_MAP.put("1102", new MetricConfig("memoryUtilization",
|
||||
"(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100",
|
||||
"%"));
|
||||
METRIC_MAP.put("1103", new MetricConfig("memAvailable",
|
||||
"node_memory_MemAvailable_bytes{instance='%s'}",
|
||||
"B"));
|
||||
METRIC_MAP.put("37493", new MetricConfig("responseSuccessRate", "vector(100)", "%"));
|
||||
METRIC_MAP.put("37454",
|
||||
new MetricConfig("memAvailable", "node_memory_MemAvailable_bytes{instance='%s'}",
|
||||
"B"));
|
||||
METRIC_MAP.put("1104", new MetricConfig("swapUtilization",
|
||||
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
|
||||
"%"));
|
||||
METRIC_MAP.put("1105", new MetricConfig("swapTotalSize",
|
||||
"node_memory_SwapTotal_bytes{instance='%s'}",
|
||||
"B"));
|
||||
METRIC_MAP.put("1106", new MetricConfig("freeSwapSpace",
|
||||
"node_memory_SwapFree_bytes{instance='%s'}",
|
||||
"B"));
|
||||
METRIC_MAP.put("1107", new MetricConfig("swap_used_percent",
|
||||
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
|
||||
"%"));
|
||||
|
||||
// 磁盘相关 (1201-1202)
|
||||
METRIC_MAP.put("1201", new MetricConfig("disk_io_read_bytes",
|
||||
"sum(irate(node_disk_read_bytes_total{instance='%s'}[5m]))",
|
||||
"bytes/s"));
|
||||
METRIC_MAP.put("1202", new MetricConfig("disk_io_write_bytes",
|
||||
"irate(node_disk_written_bytes_total{instance='%s'}[5m])",
|
||||
"bytes/s"));
|
||||
|
||||
// 网络相关 (1301-1306)
|
||||
METRIC_MAP.put("1301", new MetricConfig("network_receive_bytes",
|
||||
"irate(node_network_receive_bytes_total{instance='%s',device!='lo'}[5m])",
|
||||
"bytes/s"));
|
||||
METRIC_MAP.put("1302", new MetricConfig("network_transmit_bytes",
|
||||
"irate(node_network_transmit_bytes_total{instance='%s',device!='lo'}[5m])",
|
||||
"bytes/s"));
|
||||
METRIC_MAP.put("1303", new MetricConfig("throughput",
|
||||
"sum(irate(node_network_receive_bytes_total{instance='%s',device!~'lo'}[1m]))",
|
||||
"Kb/s"));
|
||||
METRIC_MAP.put("1304", new MetricConfig("latency_ms",
|
||||
"avg(irate(node_network_receive_bytes_total{instance='%s'}[5m])) > 0",
|
||||
"ms"));
|
||||
METRIC_MAP.put("1305", new MetricConfig("latency_s",
|
||||
"histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{instance='%s'}[5m])) by (le))",
|
||||
"s"));
|
||||
METRIC_MAP.put("1306", new MetricConfig("tcp_established_connections",
|
||||
"node_netstat_Tcp_CurrEstab{instance='%s'}",
|
||||
"count"));
|
||||
|
||||
// 进程相关 (1401)
|
||||
METRIC_MAP.put("1401", new MetricConfig("process_count",
|
||||
"node_procs_running{instance='%s'}",
|
||||
"count"));
|
||||
|
||||
// 数据库相关 (1501-1502)
|
||||
METRIC_MAP.put("1501", new MetricConfig("dbMemory",
|
||||
"pg_database_size_bytes{datname='armd',instance='%s'}/1024",
|
||||
"Kb"));
|
||||
METRIC_MAP.put("1502", new MetricConfig("dblSize",
|
||||
"sum(pg_database_size_bytes{instance='%s'})",
|
||||
"B"));
|
||||
|
||||
// 应用相关 (1601-1603)
|
||||
METRIC_MAP.put("1601", new MetricConfig("responseSuccessRate",
|
||||
"sum(rate(http_requests_total{instance='%s',status=~'2..'}[5m])) / sum(rate(http_requests_total{instance='%s'}[5m])) * 100",
|
||||
"%"));
|
||||
METRIC_MAP.put("1602", new MetricConfig("login",
|
||||
"sum(increase(login_attempts_total{instance='%s'}[5m]))",
|
||||
""));
|
||||
METRIC_MAP.put("1603", new MetricConfig("connections",
|
||||
"node_netstat_Tcp_CurrEstab{instance='%s'}",
|
||||
""));
|
||||
|
||||
// 日志相关 (1701)
|
||||
METRIC_MAP.put("1701", new MetricConfig("logRemainingSize",
|
||||
"node_filesystem_avail_bytes{instance='%s',mountpoint=~'/var/log.*|/opt/log.*'}",
|
||||
"B"));
|
||||
|
||||
// 系统负载 (1801)
|
||||
METRIC_MAP.put("1801", new MetricConfig("load",
|
||||
"node_load1{instance='%s'}",
|
||||
""));
|
||||
}
|
||||
|
||||
private String name;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,25 @@
|
|||
package org.jeecg.modules.entity;
|
||||
|
||||
import lombok.Data;
|
||||
import org.jeecg.modules.base.entity.monitor.Item;
|
||||
|
||||
@Data
|
||||
public class MonitorItem extends Item {
|
||||
|
||||
// 构造方法
|
||||
public MonitorItem() {
|
||||
}
|
||||
|
||||
public MonitorItem(String itemId, String name, String hostId, String description,
|
||||
String units, String status, org.jeecg.modules.entity.ValueType valueType) {
|
||||
|
||||
this.setItemId(itemId);
|
||||
this.setName(name);
|
||||
this.setHostId(hostId);
|
||||
this.setDescription(description);
|
||||
this.setUnits(units);
|
||||
this.setStatus(status);
|
||||
this.setValueType(String.valueOf(valueType.getCode()));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
package org.jeecg.modules.entity;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public enum ValueType {
|
||||
|
||||
// 使用枚举值和对应的数字代码
|
||||
INT(1, "int", "整型"),
|
||||
FLOAT(2, "float", "浮点型"),
|
||||
DOUBLE(3, "double", "双精度浮点型"),
|
||||
LONG(4, "long", "长整型"),
|
||||
STRING(5, "string", "字符串类型"),
|
||||
BOOLEAN(6, "boolean", "布尔类型");
|
||||
|
||||
private final int code; // 数字代码
|
||||
private final String name; // 类型名称
|
||||
private final String desc; // 描述
|
||||
|
||||
ValueType(int code, String name, String desc) {
|
||||
this.code = code;
|
||||
this.name = name;
|
||||
this.desc = desc;
|
||||
}
|
||||
|
||||
public int getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getDesc() {
|
||||
return desc;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据代码获取枚举
|
||||
*/
|
||||
public static ValueType fromCode(int code) {
|
||||
for (ValueType type : values()) {
|
||||
if (type.code == code) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("无效的值类型代码: " + code);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据名称获取枚举
|
||||
*/
|
||||
public static ValueType fromName(String name) {
|
||||
for (ValueType type : values()) {
|
||||
if (type.name.equalsIgnoreCase(name)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("无效的值类型名称: " + name);
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证代码是否有效
|
||||
*/
|
||||
public static boolean isValidCode(int code) {
|
||||
for (ValueType type : values()) {
|
||||
if (type.code == code) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有代码列表
|
||||
*/
|
||||
public static List<Integer> getAllCodes() {
|
||||
return Arrays.stream(values())
|
||||
.map(ValueType::getCode)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
|
@ -16,8 +16,7 @@ public interface IMonitorService {
|
|||
|
||||
void dbDetails(String start, String end, String hostId);
|
||||
|
||||
Result<?> queryCpuHistoryData(String itemId, Integer itemType, String start, String end,
|
||||
String instance);
|
||||
Result<?> queryCpuHistoryData(String itemId, Integer itemType, String start, String end);
|
||||
|
||||
Result<?> detail( String hostId, String pageName, String start, String end);
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import org.jeecg.modules.base.entity.monitor.History;
|
|||
import org.jeecg.modules.base.entity.monitor.Host;
|
||||
import org.jeecg.modules.base.entity.monitor.Item;
|
||||
import org.jeecg.modules.base.entity.monitor.ItemHistory;
|
||||
import org.jeecg.modules.base.entity.postgre.AlarmItem;
|
||||
import org.jeecg.modules.base.entity.postgre.SysDatabase;
|
||||
import org.jeecg.modules.base.entity.postgre.SysServer;
|
||||
import org.jeecg.modules.base.enums.ServerStatus;
|
||||
|
|
@ -42,6 +43,7 @@ import org.jeecg.modules.entity.*;
|
|||
import org.jeecg.modules.feignclient.ManageUtil;
|
||||
import org.jeecg.modules.feignclient.MonitorAlarm;
|
||||
import org.jeecg.modules.feignclient.SystemClient;
|
||||
import org.jeecg.modules.mapper.AlarmItemMapper;
|
||||
import org.jeecg.modules.mapper.SysServerMapper;
|
||||
import org.jeecg.modules.service.IMonitorService;
|
||||
import org.jeecg.modules.service.ISysDatabaseService;
|
||||
|
|
@ -77,6 +79,8 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
|
||||
@Autowired
|
||||
private SystemClient systemClient;
|
||||
@Autowired
|
||||
private AlarmItemMapper alarmItemMapper;
|
||||
|
||||
@Autowired
|
||||
private ISysDatabaseService sysDatabaseService;
|
||||
|
|
@ -86,41 +90,8 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
private static final DateTimeFormatter FORMATTER =
|
||||
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
private static final String INSTANCE_9100 = "172.21.170.11:9100";
|
||||
//private static final String INSTANCE_9256 = "172.21.170.11:9256";
|
||||
//endregion
|
||||
|
||||
// region 数据库监控 PromQL
|
||||
|
||||
private static final String PG_INSTANCE = "172.21.170.11.*";
|
||||
|
||||
private static final String SHARED_BUFFERS =
|
||||
"pg_settings_shared_buffers_bytes{instance=~\"" + PG_INSTANCE + "\"}";
|
||||
|
||||
private static final String ACTIVE_CONNECTIONS =
|
||||
"sum(pg_stat_database_numbackends{instance=~\"" + PG_INSTANCE
|
||||
+ "\",datname!~\"template[0-1]\"})";
|
||||
|
||||
private static final String CONNECTION_NUMBER = ACTIVE_CONNECTIONS;
|
||||
|
||||
private static final String BUFFER_HIT_RATIO =
|
||||
"pg_stat_database_blks_hit{instance=~\"" + PG_INSTANCE + "\",datname=\"postgres\"}"
|
||||
+ " / "
|
||||
+ "(pg_stat_database_blks_hit{instance=~\"" + PG_INSTANCE
|
||||
+ "\",datname=\"postgres\"}"
|
||||
+ " + pg_stat_database_blks_read{instance=~\"" + PG_INSTANCE
|
||||
+ "\",datname=\"postgres\"})"
|
||||
+ " * 100";
|
||||
|
||||
private static final String WAL_SIZE =
|
||||
"pg_settings_max_wal_size_bytes{instance=~\"" + PG_INSTANCE + "\"}";
|
||||
|
||||
private static final String DATABASE_SIZE =
|
||||
"sum(pg_database_size_bytes{instance=~\"" + PG_INSTANCE + "\"})";
|
||||
|
||||
// endregion
|
||||
|
||||
|
||||
//region 通用查询方法
|
||||
|
||||
/**
|
||||
|
|
@ -159,8 +130,11 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
//region CPU 监控
|
||||
@Override
|
||||
public Result<?> queryCpuHistoryData(String itemId, Integer itemType,
|
||||
String start, String end, String instance) {
|
||||
String start, String end) {
|
||||
try {
|
||||
//通过itemId查找监控项
|
||||
AlarmItem alarmItem = alarmItemMapper.selectById(itemId);
|
||||
String instance = alarmItem.getHostId();
|
||||
long startTime = parseTimestamp(start);
|
||||
long endTime = parseTimestamp(end);
|
||||
|
||||
|
|
@ -196,7 +170,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
|
||||
String promql = String.format(
|
||||
"1 - avg(rate(node_cpu_seconds_total{instance='%s', mode='idle'}[1m]))",
|
||||
INSTANCE_9100);
|
||||
hostId);
|
||||
|
||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||
return parseToCpuResult(results, "cpuUtilization", "%", true);
|
||||
|
|
@ -209,7 +183,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
String promql = String.format(
|
||||
"(node_filesystem_avail_bytes{instance='%s', mountpoint='/'} / "
|
||||
+ "node_filesystem_size_bytes{instance='%s', mountpoint='/'}) * 100",
|
||||
INSTANCE_9100, INSTANCE_9100);
|
||||
hostId, hostId);
|
||||
|
||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||
return parseToCpuResult(results, "cpuIdleRate", "%", false);
|
||||
|
|
@ -220,7 +194,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
long end = parseToUnix(endStr);
|
||||
|
||||
String promql = String.format(
|
||||
"irate(node_intr_total{instance='%s'}[1m])", INSTANCE_9100);
|
||||
"irate(node_intr_total{instance='%s'}[1m])", hostId);
|
||||
|
||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||
return parseToCpuResult(results, "cpuInterrupt", "", false);
|
||||
|
|
@ -231,7 +205,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
long end = parseToUnix(endStr);
|
||||
|
||||
String promql = String.format(
|
||||
"irate(node_context_switches_total{instance='%s'}[1m])", INSTANCE_9100);
|
||||
"irate(node_context_switches_total{instance='%s'}[1m])", hostId);
|
||||
|
||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||
return parseToCpuResult(results, "cpuSwitch", "", false);
|
||||
|
|
@ -243,7 +217,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
|
||||
String promql = String.format(
|
||||
"sum by (cpu) (irate(node_cpu_seconds_total{instance='%s', mode!='idle'}[1m])) * 100",
|
||||
INSTANCE_9100);
|
||||
instance);
|
||||
|
||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||
List<CpuMonitorResult> coreResults = new ArrayList<>();
|
||||
|
|
@ -306,7 +280,6 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
}
|
||||
//endregion
|
||||
|
||||
|
||||
//region 数据库 监控
|
||||
@Override
|
||||
public void dbDetails(String start, String end, String hostId) {
|
||||
|
|
@ -338,36 +311,44 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
case "postgresql":
|
||||
//postgres
|
||||
result.add(buildItemFromQuery("Database memory state(Total memory)", "B",
|
||||
SHARED_BUFFERS, startTime, endTime, step));
|
||||
"pg_settings_shared_buffers_bytes{instance='"+hostId+"'}", startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Database user status(Logins per second)", "",
|
||||
ACTIVE_CONNECTIONS, startTime, endTime, step));
|
||||
"sum(pg_stat_database_numbackends{instance=~\"" + hostId
|
||||
+ "\",datname!~\"template[0-1]\"})", startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Database connection number", "",
|
||||
CONNECTION_NUMBER, startTime, endTime, step));
|
||||
"sum(pg_stat_database_numbackends{instance=~\"" + hostId
|
||||
+ "\",datname!~\"template[0-1]\"})", startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Database connection response time", "",
|
||||
BUFFER_HIT_RATIO, startTime, endTime, step));
|
||||
"pg_stat_database_blks_hit{instance=~\"" + hostId + "\",datname=\"postgres\"}"
|
||||
+ " / "
|
||||
+ "(pg_stat_database_blks_hit{instance=~\"" + hostId
|
||||
+ "\",datname=\"postgres\"}"
|
||||
+ " + pg_stat_database_blks_read{instance=~\"" + hostId
|
||||
+ "\",datname=\"postgres\"})"
|
||||
+ " * 100", startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Remaining space of the database log file", "B",
|
||||
WAL_SIZE, startTime, endTime, step));
|
||||
"pg_settings_max_wal_size_bytes{instance=~\"" + hostId + "\"}", startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Database file size", "B",
|
||||
DATABASE_SIZE, startTime, endTime, step));
|
||||
"sum(pg_database_size_bytes{instance=~\"" + hostId + "\"})", startTime, endTime, step));
|
||||
break;
|
||||
case "Oracle":
|
||||
//TODO 待修改,优化promql语句
|
||||
//Oracle
|
||||
result.add(buildItemFromQuery("Database memory state(Total memory)", "B",
|
||||
"oracledb_sysmetric_total_pga_allocated{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"}",
|
||||
"oracledb_sysmetric_total_pga_allocated{instance=\""+hostId+"\"}",
|
||||
startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Database user status(Logins per second)", "",
|
||||
"oracledb_sysmetric_logons_per_sec", startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Database connection number", "",
|
||||
"oracledb_sysmetric_session_count", startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Database connection response time", "",
|
||||
"oracledb_sysmetric_sql_service_response_time{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"}",
|
||||
"oracledb_sysmetric_sql_service_response_time{instance=\""+hostId+"\"}",
|
||||
startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Remaining space of the database log file", "B",
|
||||
"oracledb_env_redo_value{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\",name=\"log_size\"}",
|
||||
"oracledb_env_redo_value{instance=\""+hostId+"\",name=\"log_size\"}",
|
||||
startTime, endTime, step));
|
||||
result.add(buildItemFromQuery("Database file size", "B",
|
||||
"sum(oracledb_tablespace_bytes{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"})",
|
||||
"sum(oracledb_tablespace_bytes{instance=\""+hostId+"\"})",
|
||||
startTime, endTime, step));
|
||||
break;
|
||||
}
|
||||
|
|
@ -409,7 +390,6 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
//region 服务器与进程
|
||||
@Override
|
||||
public Result<?> detail(String hostId, String pageName, String start, String end) {
|
||||
hostId = "172.21.170.10";
|
||||
|
||||
switch (pageName) {
|
||||
case "serviceAndProcess":
|
||||
|
|
@ -426,19 +406,25 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
private HostDetailsResult getHostDetails(String hostIp, String start, String end) {
|
||||
long startTime = parseTimestamp(start);
|
||||
long endTime = parseTimestamp(end);
|
||||
long step = calculateStep(startTime, endTime);
|
||||
String cpuQuery =
|
||||
"topk(5,\n" +
|
||||
" avg_over_time(\n" +
|
||||
" sum by (groupname) (\n" +
|
||||
" irate(namedprocess_namegroup_cpu_seconds_total[5m])\n" +
|
||||
" )[5m:]\n" +
|
||||
" )\n" +
|
||||
")";
|
||||
|
||||
String cpuQuery = String.format(
|
||||
"topk(5, sum by (groupname) (irate(namedprocess_namegroup_cpu_seconds_total{instance='%s'}[5m])))",
|
||||
hostIp + ":9256");
|
||||
|
||||
String memQuery = String.format(
|
||||
"topk(5,namedprocess_namegroup_memory_bytes{instance=~\"%s\", memtype=\"resident\"}"
|
||||
+ "/scalar(max(node_memory_MemTotal_bytes{instance=~\"%s\"})))",
|
||||
hostIp + ":9256", hostIp + ":9100");
|
||||
hostIp, hostIp);
|
||||
|
||||
HostDetailsResult result = new HostDetailsResult();
|
||||
result.setCpu(queryProcessMetrics(cpuQuery, startTime, endTime, true));
|
||||
result.setMemory(queryProcessMetrics(memQuery, startTime, endTime, true));
|
||||
result.setCpu(queryProcessMetrics(cpuQuery, startTime, endTime, true, step));
|
||||
result.setMemory(queryProcessMetrics(memQuery, startTime, endTime, true, step));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -448,13 +434,18 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
* @param isPercent 是否乘以 100 转百分比
|
||||
*/
|
||||
private List<HostDetailsResult.ProcessMetric> queryProcessMetrics(
|
||||
String promql, long start, long end, boolean isPercent) {
|
||||
String promql, long start, long end, boolean isPercent, long step) {
|
||||
|
||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||
List<PrometheusUtil.QueryResult> resultList = queryRange(promql, start, end, step);
|
||||
List<PrometheusUtil.QueryResult> safeSorted = resultList.stream()
|
||||
.sorted(Comparator.comparingInt(qr ->
|
||||
qr.getValues() == null ? 0 : qr.getValues().size()
|
||||
))
|
||||
.collect(Collectors.toList());
|
||||
List<HostDetailsResult.ProcessMetric> metrics = new ArrayList<>();
|
||||
double factor = isPercent ? 100.0 : 1.0;
|
||||
|
||||
for (PrometheusUtil.QueryResult qr : results) {
|
||||
for (PrometheusUtil.QueryResult qr : safeSorted) {
|
||||
String name = qr.getMetrics() != null
|
||||
? qr.getMetrics().getOrDefault("groupname", "unknown")
|
||||
: "unknown";
|
||||
|
|
@ -465,8 +456,8 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
if (qr.getValues() != null) {
|
||||
for (PrometheusUtil.DataPoint p : qr.getValues()) {
|
||||
history.add(new HostDetailsResult.HistoryPoint(
|
||||
p.getTimestamp(), p.getValue()));
|
||||
stats.accept(p.getValue());
|
||||
p.getTimestamp(), p.getValue() / 100));
|
||||
stats.accept(p.getValue() / 100);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -520,17 +511,20 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
@Override
|
||||
public Result<?> item(String itemId, Integer itemType, String start, String end) {
|
||||
MetricConfig config = MetricConfig.METRIC_MAP.get(itemId);
|
||||
//通过itemId查找监控项
|
||||
AlarmItem alarmItem = alarmItemMapper.selectById(itemId);
|
||||
|
||||
if (config == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String instance = alarmItem.getHostId();
|
||||
try {
|
||||
long startTs = LocalDateTime.parse(start, FORMATTER)
|
||||
.toEpochSecond(ZoneOffset.ofHours(8));
|
||||
long endTs = LocalDateTime.parse(end, FORMATTER)
|
||||
.toEpochSecond(ZoneOffset.ofHours(8));
|
||||
|
||||
String promql = String.format(config.getPromQl(), INSTANCE_9100, INSTANCE_9100);
|
||||
String promql = String.format(config.getPromQl(), instance, instance);
|
||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, startTs, endTs, 60);
|
||||
|
||||
List<Map<String, Object>> historyPoints = new ArrayList<>();
|
||||
|
|
@ -558,7 +552,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
|||
return Result.OK(finalResult);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("查询指标项失败: itemId={}", itemId, e);
|
||||
log.error("查询指标项失败: instance={}", instance, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import org.jeecg.common.system.vo.DictModel;
|
|||
import org.jeecg.common.util.JDBCUtil;
|
||||
import org.jeecg.common.util.NumUtil;
|
||||
import org.jeecg.common.util.RedisUtil;
|
||||
import org.jeecg.modules.Util.PrometheusAlertManager;
|
||||
import org.jeecg.modules.Util.PrometheusUtil;
|
||||
import org.jeecg.modules.base.dto.*;
|
||||
import org.jeecg.modules.base.entity.monitor.Host;
|
||||
|
|
@ -44,6 +45,11 @@ import org.springframework.scheduling.annotation.Scheduled;
|
|||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.FileStore;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.*;
|
||||
|
|
@ -68,6 +74,8 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
|
|||
|
||||
@Autowired
|
||||
private PrometheusUtil prometheusUtil;
|
||||
@Autowired
|
||||
private PrometheusAlertManager prometheusAlertManager;
|
||||
|
||||
@Override
|
||||
public Result<?> findPage(QueryRequest query) {
|
||||
|
|
@ -116,7 +124,9 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
|
|||
online = prometheusUtil.isInstanceUp(instance);
|
||||
databaseDto.setOnline(online);
|
||||
// 如果数据库不在线 则不需要查询数据库的使用情况
|
||||
if (!online) continue;
|
||||
if (!online) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 获取数据库指标信息
|
||||
// dbMemory kb -> GB
|
||||
|
|
@ -374,6 +384,18 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
|
|||
}
|
||||
spaceInfos = template.query(DBSQL.SPACE_OR, mapper);
|
||||
}
|
||||
if (StrUtil.equals(dbType, POSTGRESQL.getType())) {
|
||||
String dbUrl = sysDatabase.getDbUrl();
|
||||
String dbDriver = sysDatabase.getDbDriver();
|
||||
String dbUsername = sysDatabase.getDbUsername();
|
||||
String dbPassword = sysDatabase.getDbPassword();
|
||||
RowMapper<SpaceInfo> mapper = new SpaceRowMapper();
|
||||
JdbcTemplate template = JDBCUtil.template(dbUrl, dbDriver, dbUsername, dbPassword);
|
||||
if (ObjectUtil.isNull(template)) {
|
||||
return spaceInfos;
|
||||
}
|
||||
spaceInfos = template.query(DBSQL.SPACE_PG, mapper);
|
||||
}
|
||||
return spaceInfos;
|
||||
}
|
||||
|
||||
|
|
@ -541,7 +563,8 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
|
|||
instance);
|
||||
break;
|
||||
case LOGREMAININGSIZE:
|
||||
promql = String.format("pg_wal_size_bytes{instance=\"%s\"}", instance);
|
||||
promql = String.format("pg_wal_size_bytes{instance=\"%s\"}/(1024 * 1024 )",
|
||||
instance);
|
||||
break;
|
||||
}
|
||||
} else if ("Oracle".equalsIgnoreCase(dbType)) {
|
||||
|
|
|
|||
|
|
@ -396,48 +396,54 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
|||
return Result.error("HostId" + Prompt.PARAM_REQUIRED);
|
||||
}
|
||||
//查询条件
|
||||
sysServer.setIpAddress("172.21.170.11");
|
||||
String instance = sysServer.getIpAddress() + ":9100";
|
||||
String instance = sysServer.getIpAddress();
|
||||
BasicInfo basicInfo = new BasicInfo();
|
||||
|
||||
|
||||
// --- 静态元数据 (从 node_uname_info 标签获取) ---
|
||||
String osInfoquery = String.format("node_os_info{instance=\"%s\"}", instance);
|
||||
JSONObject osInfo = queryPromSingleMetric(osInfoquery);
|
||||
if (ObjectUtil.isNotNull(osInfo)) {
|
||||
JSONObject labels = osInfo.getJSONObject("metric");
|
||||
basicInfo.setOsName(labels.getString("name"));
|
||||
basicInfo.setOsVersion(labels.getString("version"));
|
||||
List<PrometheusUtil.QueryResult> queryResultList =
|
||||
prometheusUtil.queryInstantFull(osInfoquery, null);
|
||||
if (ObjectUtil.isNotNull(queryResultList) && !queryResultList.isEmpty()) {
|
||||
Map<String, String> labels = queryResultList.get(0).getMetrics();
|
||||
basicInfo.setOsName(labels.get("name"));
|
||||
basicInfo.setOsVersion(labels.get("version"));
|
||||
}
|
||||
|
||||
// 指标包含: nodename(hostname), release(osVersion), sysname(osName), machine(cpuType)
|
||||
JSONObject unameMetric =
|
||||
queryPromSingleMetric("node_uname_info{instance=\"" + instance + "\"}");
|
||||
if (ObjectUtil.isNotNull(unameMetric)) {
|
||||
JSONObject labels = unameMetric.getJSONObject("metric");
|
||||
basicInfo.setHostName(labels.getString("nodename"));
|
||||
basicInfo.setKernelVersion(labels.getString("release"));
|
||||
String unameQuery = String.format("node_uname_info{instance=\"%s\"}", instance);
|
||||
List<PrometheusUtil.QueryResult> unameQueryList =
|
||||
prometheusUtil.queryInstantFull(unameQuery, null);
|
||||
|
||||
if (ObjectUtil.isNotNull(unameQueryList) && !unameQueryList.isEmpty()) {
|
||||
Map<String, String> labels = unameQueryList.get(0).getMetrics();
|
||||
|
||||
basicInfo.setHostName(labels.get("nodename"));
|
||||
basicInfo.setKernelVersion(labels.get("release"));
|
||||
// basicInfo.setOsName(labels.getString("sysname"));
|
||||
basicInfo.setCpuType(labels.getString("machine"));
|
||||
basicInfo.setCpuType(labels.get("machine"));
|
||||
}
|
||||
|
||||
// --- 服务器状态与运行时间 ---
|
||||
Double upValue = queryPromSingleValue("up{instance='" + instance + "'}");
|
||||
Double upValue =
|
||||
prometheusUtil.queryInstant("up{instance='" + instance + "'}");
|
||||
basicInfo.setRunningState(ObjectUtil.isNotNull(upValue) && upValue == 1.0);
|
||||
|
||||
Double bootTime = queryPromSingleValue(
|
||||
"time() - node_boot_time_seconds{instance='" + instance + "'}");
|
||||
Double bootTime =
|
||||
prometheusUtil.queryInstant(
|
||||
"time() - node_boot_time_seconds{instance='" + instance + "'}");
|
||||
basicInfo.setRunTime(
|
||||
ObjectUtil.isNull(bootTime) ? "--" : NumberUtil.round(bootTime / 3600.0, 1) + "h");
|
||||
|
||||
// --- 硬件规格 ---
|
||||
Double memTotal =
|
||||
queryPromSingleValue("node_memory_MemTotal_bytes{instance='" + instance + "'}");
|
||||
Double memTotal = prometheusUtil.queryInstant(
|
||||
"node_memory_MemTotal_bytes{instance='" + instance + "'}");
|
||||
basicInfo.setRamSize(ObjectUtil.isNull(memTotal) ? "--" :
|
||||
NumberUtil.round(memTotal / 1024 / 1024 / 1024, 1) + "GB");
|
||||
|
||||
Double cpuCores = queryPromSingleValue(
|
||||
"count(node_cpu_seconds_total{instance='" + instance + "',mode='idle'})");
|
||||
Double cpuCores =
|
||||
prometheusUtil.queryInstant(
|
||||
"count(node_cpu_seconds_total{instance='" + instance + "',mode='idle'})");
|
||||
basicInfo.setCpuCores(
|
||||
ObjectUtil.isNull(cpuCores) ? "--" : String.valueOf(cpuCores.intValue()));
|
||||
|
||||
|
|
@ -446,34 +452,36 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
|||
"count(node_filesystem_size_bytes{instance=\"%s\", fstype!~\"tmpfs|squashfs|overlay|autofs\"})",
|
||||
instance
|
||||
);
|
||||
Double diskUsed = queryPromSingleValue(query);
|
||||
Double diskUsed = prometheusUtil.queryInstant(query);
|
||||
basicInfo.setTotalDiskPar(
|
||||
ObjectUtil.isNull(diskUsed) ? "--" : String.valueOf(diskUsed.intValue()));
|
||||
// 查询语句
|
||||
JSONObject dmiResult = getNodeDmiInfoMetric(
|
||||
"node_dmi_info * on(instance) group_left(time_zone) node_time_zone_offset_seconds {instance=\"" +
|
||||
instance + "\"}");
|
||||
if (ObjectUtil.isNotNull(dmiResult)) {
|
||||
String dmiQuery = String.format(
|
||||
"node_dmi_info * on(instance) group_left(time_zone) node_time_zone_offset_seconds {instance=\"%s\"}",
|
||||
instance);
|
||||
List<PrometheusUtil.QueryResult> dmiResultList =
|
||||
prometheusUtil.queryInstantFull(dmiQuery, null);
|
||||
if (ObjectUtil.isNotNull(dmiResultList) && !dmiResultList.isEmpty()) {
|
||||
|
||||
JSONObject dmiMetric = dmiResult.getJSONObject("metric");
|
||||
basicInfo.setBiosVersion(dmiMetric.getString("bios_version"));
|
||||
basicInfo.setBiosSupplier(dmiMetric.getString("bios_vendor"));
|
||||
basicInfo.setManufacturer(dmiMetric.getString("system_vendor"));
|
||||
basicInfo.setModelNumber(dmiMetric.getString("product_name"));
|
||||
String timeZone = dmiMetric.getString("time_zone");
|
||||
Map<String, String> dmiMetric = dmiResultList.get(0).getMetrics();
|
||||
basicInfo.setBiosVersion(dmiMetric.get("bios_version"));
|
||||
basicInfo.setBiosSupplier(dmiMetric.get("bios_vendor"));
|
||||
basicInfo.setManufacturer(dmiMetric.get("system_vendor"));
|
||||
basicInfo.setModelNumber(dmiMetric.get("product_name"));
|
||||
String timeZone = dmiMetric.get("time_zone");
|
||||
basicInfo.setZone(timeZone);
|
||||
}
|
||||
|
||||
//startTime
|
||||
JSONObject startTime =
|
||||
getNodeDmiInfoMetric("node_boot_time_seconds{instance=\"" + instance + "\"}");
|
||||
|
||||
if (ObjectUtil.isNotNull(startTime)) {
|
||||
List<PrometheusUtil.QueryResult> startTimeList = prometheusUtil.queryInstantFull(
|
||||
"node_boot_time_seconds{instance=\"" + instance + "\"}", null);
|
||||
if (ObjectUtil.isNotNull(startTimeList) && !startTimeList.isEmpty()) {
|
||||
// 解析
|
||||
JSONArray array = startTime.getJSONArray("value");
|
||||
PrometheusUtil.DataPoint dataPoint = startTimeList.get(0).getValue();
|
||||
Integer time = null;
|
||||
if (array != null && array.size() > 1) {
|
||||
Object value = array.get(1);
|
||||
if (dataPoint != null) {
|
||||
Object value = dataPoint.getValue();
|
||||
if (value instanceof String) {
|
||||
time = Double.valueOf((String) value).intValue();
|
||||
} else if (value instanceof Number) {
|
||||
|
|
@ -487,21 +495,21 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
|||
|
||||
// --- 实时利用率 (仪表盘数据) ---
|
||||
// CPU使用率 = 100 - idle
|
||||
Double cpuUsedVal = queryPromSingleValue(
|
||||
Double cpuUsedVal = prometheusUtil.queryInstant(
|
||||
"100 - (avg(irate(node_cpu_seconds_total{instance='" + instance +
|
||||
"',mode='idle'}[5m])) * 100)");
|
||||
basicInfo.setCpuUsed(ObjectUtil.isNull(cpuUsedVal) ? 0.0 :
|
||||
NumberUtil.round(cpuUsedVal, 1).doubleValue());
|
||||
|
||||
// 内存使用率 = (Total - Avail) / Total
|
||||
Double memUsedVal = queryPromSingleValue(
|
||||
Double memUsedVal = prometheusUtil.queryInstant(
|
||||
"(1 - (node_memory_MemAvailable_bytes{instance='" + instance +
|
||||
"'} / node_memory_MemTotal_bytes{instance='" + instance + "'})) * 100");
|
||||
basicInfo.setMemoryUsed(
|
||||
ObjectUtil.isNull(memUsedVal) ? 0.0 :
|
||||
NumberUtil.round(memUsedVal, 1).doubleValue());
|
||||
|
||||
// 响应成功率 (Prometheus通常需要通过blackbox_exporter或ping,这里演示默认值)
|
||||
// 响应成功率 ()
|
||||
basicInfo.setResponseSuccessRate(100.0);
|
||||
|
||||
// --- 磁盘使用情况 (多分区解析) ---
|
||||
|
|
@ -511,7 +519,7 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
|||
basicInfo.setIp(sysServer.getIpAddress());
|
||||
basicInfo.setLocation("DataCenter");
|
||||
|
||||
double netWorks = queryPromSingleValue(
|
||||
double netWorks = prometheusUtil.queryInstant(
|
||||
"count(node_network_info{operstate=\"up\",instance=\"" + instance + "\"})");
|
||||
basicInfo.setNetwork(String.valueOf(netWorks));
|
||||
|
||||
|
|
@ -674,26 +682,14 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
|||
instance + "',fstype!~'tmpfs|udev|overlay'}) * 100";
|
||||
|
||||
try {
|
||||
String url = PROM_URL + URLEncoder.encode(promql, StandardCharsets.UTF_8.name());
|
||||
// 1. 手动对 query 参数进行编码
|
||||
String encodedQuery = URLEncoder.encode(promql, StandardCharsets.UTF_8.name());
|
||||
|
||||
// 2. 拼接完整的 URL 字符串
|
||||
String fullUrl = PROM_URL + encodedQuery;
|
||||
|
||||
// 3. 【关键步骤】将 String 包装成 java.net.URI
|
||||
// 这样 RestTemplate 就会原封不动地发送这个地址
|
||||
java.net.URI uri = new java.net.URI(fullUrl);
|
||||
|
||||
String response = restTemplate.getForObject(uri, String.class);
|
||||
JSONObject json = JSON.parseObject(response);
|
||||
JSONArray resultArr = json.getJSONObject("data").getJSONArray("result");
|
||||
|
||||
for (int i = 0; i < resultArr.size(); i++) {
|
||||
JSONObject item = resultArr.getJSONObject(i);
|
||||
String mountpoint = item.getJSONObject("metric").getString("mountpoint");
|
||||
Double val = item.getJSONArray("value").getDouble(1);
|
||||
List<PrometheusUtil.QueryResult> queryResults =
|
||||
prometheusUtil.queryInstantFull(promql, null);
|
||||
|
||||
for (int i = 0; i < queryResults.size(); i++) {
|
||||
Map<String, String> item = queryResults.get(i).getMetrics();
|
||||
String mountpoint = item.get("mountpoint");
|
||||
PrometheusUtil.DataPoint dataPoint = queryResults.get(i).getValue();
|
||||
Double val = dataPoint.getValue();
|
||||
xData.add(mountpoint);
|
||||
yData.add(NumberUtil.round(val, 1).doubleValue());
|
||||
}
|
||||
|
|
@ -705,23 +701,6 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
|||
return diskUsedValue;
|
||||
}
|
||||
|
||||
public JSONObject getNodeDmiInfoMetric(String query) {
|
||||
|
||||
try {
|
||||
// 获取 Metric 标签的方法
|
||||
JSONObject metricObj = queryPromSingleMetric(query);
|
||||
|
||||
if (ObjectUtil.isNotNull(metricObj)) {
|
||||
|
||||
|
||||
return metricObj;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("获取 BIOS Version 失败: {}", e.getMessage());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 时区值映射
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user