From 59e337065b763379c70c8697dc194d64c9268c3b Mon Sep 17 00:00:00 2001 From: duwenyuan <15600000461@163.com> Date: Tue, 12 May 2026 15:21:27 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=91=8A=E8=AD=A6=E7=B3=BB?= =?UTF-8?q?=E7=BB=9F=E7=9B=91=E6=8E=A7=E9=A1=B9=EF=BC=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/jeecg/modules/Util/MonitorConfig.java | 214 ++++++ .../modules/Util/PrometheusAlertManager.java | 682 ++++++++++++++++++ .../jeecg/modules/Util/PrometheusUtil.java | 54 +- .../jeecg/modules/aspect/StatusAspect.java | 45 +- .../jeecg/modules/entity/MetricConfig.java | 112 ++- .../org/jeecg/modules/entity/MonitorItem.java | 25 + .../org/jeecg/modules/entity/ValueType.java | 83 +++ .../modules/service/IMonitorService.java | 3 +- .../service/impl/MonitorServiceImpl.java | 124 ++-- .../service/impl/SysDatabaseServiceImpl.java | 27 +- .../service/impl/SysServerServiceImpl.java | 135 ++-- 11 files changed, 1306 insertions(+), 198 deletions(-) create mode 100644 jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/MonitorConfig.java create mode 100644 jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/PrometheusAlertManager.java create mode 100644 jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/MonitorItem.java create mode 100644 jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/ValueType.java diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/MonitorConfig.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/MonitorConfig.java new file mode 100644 index 00000000..89604593 --- /dev/null +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/MonitorConfig.java @@ -0,0 +1,214 @@ +package org.jeecg.modules.Util; + +import org.jeecg.modules.base.entity.monitor.Item; +import org.jeecg.modules.entity.MonitorItem; +import org.jeecg.modules.entity.ValueType; + + +import java.util.*; +import java.util.stream.Collectors; + +/** + * 监控项配置类 - 包含所有预定义的监控项 + */ +public class MonitorConfig { + + // 默认主机ID + public static final String DEFAULT_HOST_ID = "host-001"; + + // 默认状态 + public static final String DEFAULT_STATUS = "active"; + + // 按类别分组存储监控项 + private static final Map> CATEGORY_ITEMS = new HashMap<>(); + // ID生成器 + private static int nextId = 1001; + + static { + initializeAllItems(); + } + + /** + * 初始化所有监控项 + */ + private static void initializeAllItems() { + // 生成ID列表 + Map idMap = new HashMap<>(); + + // CPU相关监控项 + List cpuItems = Arrays.asList( + createItem(1001, "cpu_usage_percent", "CPU使用率百分比,反映服务器计算资源占用情况", "%", ValueType.FLOAT), + createItem(1002, "cpuUtilization", "CPU utilization in %", "%", ValueType.FLOAT), + createItem(1003, "system_load_average", "系统平均负载,1分钟内等待运行的进程平均数", "load", ValueType.FLOAT) + ); + + // 内存相关监控项 + List memoryItems = Arrays.asList( + createItem(1101, "memory_used_percent", "内存使用率,服务器物理内存占用比例", "%", ValueType.FLOAT), + createItem(1102, "memoryUtilization", "Memory used percentage is calculated as (100-pavailable)", "%", ValueType.FLOAT), + createItem(1103, "memAvailable", "服务器物理内存可用空间", "B", ValueType.LONG), + createItem(1104, "swapUtilization", "服务器交换内存使用率", "%", ValueType.FLOAT), + createItem(1105, "swapTotalSize", "The total space of swap volume/file in bytes", "B", ValueType.LONG), + createItem(1106, "freeSwapSpace", "The free space of swap volume/file in bytes", "B", ValueType.LONG), + createItem(1107, "swap_used_percent", "交换分区(Swap)使用率,内存不足时的虚拟内存占用", "%", ValueType.FLOAT) + ); + + // 磁盘相关监控项 + List diskItems = Arrays.asList( + createItem(1201, "disk_io_read_bytes", "磁盘读取吞吐量,每秒读取的数据量", "bytes/s", ValueType.FLOAT), + createItem(1202, "disk_io_write_bytes", "磁盘写入吞吐量,每秒写入的数据量", "bytes/s", ValueType.FLOAT) + ); + + // 网络相关监控项 + List networkItems = Arrays.asList( + createItem(1301, "network_receive_bytes", "网络接收流量,网卡入站数据传输速率", "bytes/s", ValueType.FLOAT), + createItem(1302, "network_transmit_bytes", "网络发送流量,网卡出站数据传输速率", "bytes/s", ValueType.FLOAT), + createItem(1303, "throughput", "网卡吞吐量", "Kb/s", ValueType.FLOAT), + createItem(1304, "latency_ms", "响应延迟(毫秒)", "ms", ValueType.FLOAT), + createItem(1305, "latency_s", "响应时间(秒)", "s", ValueType.FLOAT), + createItem(1306, "tcp_established_connections", "当前建立的TCP连接数,反映网络连接活跃状态", "count", ValueType.INT) + ); + + // 进程相关监控项 + List processItems = Arrays.asList( + createItem(1401, "process_count", "当前运行中的进程总数", "count", ValueType.INT) + ); + + // 数据库相关监控项 + List databaseItems = Arrays.asList( + createItem(1501, "dbMemory", "数据库内存", "Kb", ValueType.LONG), + createItem(1502, "dblSize", "数据库文件占用空间", "B", ValueType.LONG) + ); + + // 应用相关监控项 + List appItems = Arrays.asList( + createItem(1601, "responseSuccessRate", "服务器响应成功率", "%", ValueType.FLOAT), + createItem(1602, "login", "登录数", "", ValueType.INT), + createItem(1603, "connections", "连接数", "", ValueType.INT) + ); + + // 日志相关监控项 + List logItems = Arrays.asList( + createItem(1701, "logRemainingSize", "日志剩余空间", "B", ValueType.LONG) + ); + + // 系统负载 + List systemItems = Arrays.asList( + createItem(1801, "load", "系统负载", "", ValueType.FLOAT) + ); + + // 添加到类别映射 + CATEGORY_ITEMS.put("cpu", cpuItems); + CATEGORY_ITEMS.put("memory", memoryItems); + CATEGORY_ITEMS.put("swap", memoryItems.stream() + .filter(item -> item.getName().contains("swap") || item.getName().equals("swap")) + .collect(Collectors.toList())); + CATEGORY_ITEMS.put("disk", diskItems); + CATEGORY_ITEMS.put("network", networkItems); + CATEGORY_ITEMS.put("process", processItems); + CATEGORY_ITEMS.put("database", databaseItems); + CATEGORY_ITEMS.put("application", appItems); + CATEGORY_ITEMS.put("log", logItems); + CATEGORY_ITEMS.put("system", systemItems); + } + + /** + * 创建监控项 + */ + private static MonitorItem createItem(int id, String name, String description, String units, ValueType valueType) { + return new MonitorItem(String.valueOf(id), name, DEFAULT_HOST_ID, description, units, DEFAULT_STATUS, valueType); + } + + /** + * 获取所有监控项 + */ + public static List getAllItems() { + return CATEGORY_ITEMS.values().stream() + .flatMap(List::stream) + .collect(Collectors.toList()); + } + + /** + * 按类别获取监控项 + */ + public static List getItemsByCategory(String category) { + return CATEGORY_ITEMS.getOrDefault(category.toLowerCase(), Collections.emptyList()); + } + + /** + * 通过名称查找监控项 + */ + public static Item findByName(String name) { + return getAllItems().stream() + .filter(item -> item.getName().equals(name)) + .findFirst() + .orElse(null); + } + + /** + * 通过ID查找监控项 + */ + public static Item findById(String itemId) { + return getAllItems().stream() + .filter(item -> item.getItemId().equals(itemId)) + .findFirst() + .orElse(null); + } + + /** + * 通过数字ID查找监控项 + */ + public static Item findById(int itemId) { + return findById(String.valueOf(itemId)); + } + + /** + * 获取指定主机的所有监控项 + */ + public static List getItemsByHost(String hostId) { + return getAllItems().stream() + .filter(item -> item.getHostId().equals(hostId)) + .collect(Collectors.toList()); + } + + /** + * 验证监控项是否存在 + */ + public static boolean contains(String name) { + return findByName(name) != null; + } + + /** + * 获取所有监控项名称 + */ + public static List getAllItemNames() { + return getAllItems().stream() + .map(Item::getName) + .collect(Collectors.toList()); + } + + /** + * 获取所有类别 + */ + public static List getAllCategories() { + return new ArrayList<>(CATEGORY_ITEMS.keySet()); + } + + /** + * 获取监控项统计信息 + */ + public static Map getStatistics() { + Map stats = new HashMap<>(); + stats.put("totalItems", getAllItems().size()); + stats.put("categories", getAllCategories()); + + Map categoryCounts = new HashMap<>(); + for (Map.Entry> entry : CATEGORY_ITEMS.entrySet()) { + categoryCounts.put(entry.getKey(), entry.getValue().size()); + } + stats.put("categoryCounts", categoryCounts); + + return stats; + } + +} diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/PrometheusAlertManager.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/PrometheusAlertManager.java new file mode 100644 index 00000000..ac56c7d9 --- /dev/null +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/PrometheusAlertManager.java @@ -0,0 +1,682 @@ +package org.jeecg.modules.Util; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.util.StrUtil; +import com.jcraft.jsch.*; +import lombok.extern.slf4j.Slf4j; +import org.jeecg.modules.base.entity.Rule; +import org.jeecg.modules.base.entity.postgre.AlarmRule; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.io.*; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Prometheus告警规则管理器 + * Prometheus服务器,管理告警规则 + */ +@Slf4j +@Component +public class PrometheusAlertManager { + + private Session session; + private ChannelExec channelExec; + private ChannelSftp channelSftp; + + + + // 连接配置 + @Value("${prometheus.ssh.host:127.0.0.1}") + private String host; + @Value("${prometheus.ssh.userName:rmsops}") + private String username; + @Value("${prometheus.ssh.password:cnndc66367220}") + private String password; + @Value("${prometheus.ssh.port:22}") + private int port; + + // 本地路径缓存 + private final Map localPathCache = new ConcurrentHashMap<>(); + + // 优化的超时配置 + private int connectionTimeout = 5000; // 本地5秒,远程30秒 + private int commandTimeout = 10000; // 本地10秒,远程60秒 + private int sftpTimeout = 8000; // 本地8秒,远程30秒 + + // Prometheus配置 + @Value("${prometheus.path.home:/opt/prometheus/}") + private String prometheusHome; + @Value("${prometheus.path.configPath:/opt/prometheus/conf/}") + private String prometheusConfigPath; + @Value("${prometheus.path.alertRulesPath:/opt/prometheus/rules/}") + private String alertRulesPath; + @Value("${prometheus.path.promtoolPath:/opt/prometheus/bin/}") + private String promtoolPath; + @Value("${prometheus.url:http://172.21.170.11}") + private String prometheusHost; + @Value("${prometheus.prometheusPort:9090}") + private int prometheusPort; + // 本地模式标识 + @Value("${prometheus.ssh.isLocalMode:true}") + private boolean isLocalMode; + + /** + * 构造函数 + */ + public PrometheusAlertManager() { + if (isLocalMode) { + adjustLocalTimeouts(); + log.info("运行在本地模式"); + } else { + log.info("运行在远程模式"); + } + } + /** + * 本地模式下调整超时时间 + */ + private void adjustLocalTimeouts() { + connectionTimeout = 5000; + commandTimeout = 10000; + sftpTimeout = 8000; + } + + /** + * 获取SSH会话 + */ + private Session getOrCreateSession() throws JSchException { + if (session == null || !session.isConnected()) { + JSch jsch = new JSch(); + session = jsch.getSession(username, host, port); + session.setPassword(password); + + Properties config = new Properties(); + config.put("StrictHostKeyChecking", "no"); + session.setConfig(config); + session.connect(connectionTimeout); + } + return session; + } + + /** + * 执行命令 - 本地模式优化 + */ + private String executeCommand(String command) throws Exception { + if (isLocalMode) { + return executeLocalCommand(command); + } else { + return executeRemoteCommand(command); + } + } + + /** + * 本地命令执行 + */ + private String executeLocalCommand(String command) throws Exception { + ProcessBuilder processBuilder = new ProcessBuilder("/bin/bash", "-c", command); + Process process = processBuilder.start(); + + StringBuilder output = new StringBuilder(); + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(process.getInputStream()))) { + String line; + while ((line = reader.readLine()) != null) { + output.append(line).append("\n"); + } + } + + int exitCode = process.waitFor(); + if (exitCode != 0) { + throw new RuntimeException("Command failed with exit code: " + exitCode + ", Output: " + + output.toString()); + } + + return output.toString().trim(); + } + + /** + * 远程命令执行 + */ + private String executeRemoteCommand(String command) throws Exception { + channelExec = (ChannelExec) getOrCreateSession().openChannel("exec"); + channelExec.setCommand(command); + channelExec.connect(commandTimeout); + + StringBuilder output = new StringBuilder(); + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(channelExec.getInputStream()))) { + String line; + while ((line = reader.readLine()) != null) { + output.append(line).append("\n"); + } + } + + int exitCode = channelExec.getExitStatus(); + channelExec.disconnect(); + + if (exitCode != 0) { + throw new RuntimeException("Command failed with exit code: " + exitCode + ", Output: " + + output.toString()); + } + + return output.toString().trim(); + } + + /** + * 获取SFTP通道 + */ + private ChannelSftp getSftpChannel() throws JSchException { + if (channelSftp == null || channelSftp.isClosed()) { + channelSftp = (ChannelSftp) getOrCreateSession().openChannel("sftp"); + channelSftp.connect(sftpTimeout); + } + return channelSftp; + } + + /** + * 文件上传 - 本地模式优化 + */ + public void uploadFile(String localFilePath, String remoteFilePath) throws Exception { + if (isLocalMode) { + // 本地模式:直接复制文件 + copyLocalFile(localFilePath, remoteFilePath); + } else { + // 远程模式:使用SFTP + ChannelSftp sftp = getSftpChannel(); + sftp.put(localFilePath, remoteFilePath); + } + log.info("文件上传完成: {} -> {}", localFilePath, remoteFilePath); + } + + /** + * 本地文件复制 + */ + private void copyLocalFile(String source, String destination) throws IOException { + File srcFile = new File(source); + File destFile = new File(destination); + + // 确保目标目录存在 + File destDir = destFile.getParentFile(); + if (destDir != null && !destDir.exists()) { + destDir.mkdirs(); + } + + try (FileInputStream fis = new FileInputStream(srcFile); + FileOutputStream fos = new FileOutputStream(destFile)) { + byte[] buffer = new byte[8192]; + int length; + while ((length = fis.read(buffer)) > 0) { + fos.write(buffer, 0, length); + } + } + } + + /** + * 列出目录内容 - 本地模式优化 + */ + public List listDirectory(String directory) throws Exception { + if (isLocalMode) { + return listLocalDirectory(directory); + } else { + return listRemoteDirectory(directory); + } + } + + /** + * 本地目录列出 + */ + private List listLocalDirectory(String directory) { + List files = new ArrayList<>(); + File dir = new File(directory); + + if (dir.exists() && dir.isDirectory()) { + File[] fileList = dir.listFiles(); + if (fileList != null) { + for (File file : fileList) { + if (!file.getName().startsWith(".")) { // 排除隐藏文件 + files.add(file.getName()); + } + } + } + } + + return files; + } + + /** + * 远程目录列出 + */ + private List listRemoteDirectory(String directory) throws SftpException, JSchException { + ChannelSftp sftp = getSftpChannel(); + Vector entries = sftp.ls(directory); + + List files = new ArrayList<>(); + for (ChannelSftp.LsEntry entry : entries) { + String filename = entry.getFilename(); + if (!filename.startsWith(".")) { // 排除隐藏文件 + files.add(filename); + } + } + + return files; + } + + /** + * 文件是否存在 - 本地模式优化 + */ + public boolean fileExists(String filePath) throws Exception { + if (isLocalMode) { + return new File(filePath).exists(); + } else { + try { + ChannelSftp sftp = getSftpChannel(); + sftp.stat(filePath); + return true; + } catch (SftpException e) { + return false; + } + } + } + + /** + * 删除文件 - 本地模式优化 + */ + public void deleteFile(String filePath) throws Exception { + if (isLocalMode) { + File file = new File(filePath); + if (file.exists()) { + file.delete(); + } + } else { + ChannelSftp sftp = getSftpChannel(); + sftp.rm(filePath); + } + log.info("文件删除完成: {}", filePath); + } + + /** + * 创建目录 - 本地模式优化 + */ + public void createDirectory(String directory) throws Exception { + if (isLocalMode) { + File dir = new File(directory); + if (!dir.exists()) { + dir.mkdirs(); + } + } else { + ChannelSftp sftp = getSftpChannel(); + try { + sftp.mkdir(directory); + } catch (SftpException e) { + // 目录可能已存在 + log.debug("目录创建失败,可能已存在: {}", directory); + } + } + log.info("目录操作完成: {}", directory); + } + + + /** + * 根据 AlarmRule 实体保存或更新 Prometheus 告警规则 + */ + public boolean saveOrUpdateAlarmRule(AlarmRule alarmRule) throws Exception { + if (alarmRule.getEnabled() == null || alarmRule.getEnabled() != 1) { + log.warn("告警规则 [{}] 未启用,跳过保存", alarmRule.getName()); + return false; + } + + // 1. 生成 YAML 内容 + String ruleContent = generatePrometheusAlertRule(alarmRule); + // 2. 使用 ID 作为文件名,确保唯一性 + String fileName = alarmRule.getId() + ".yml"; + String fullPath = FileUtil.normalize(alertRulesPath + "/" + fileName); + + // 3. 写入临时文件并上传 + String tempPath = System.getProperty("java.io.tmpdir")+ fileName; + writeTempFile(tempPath, ruleContent); + + // 4. 如果原文件存在则备份 + if (fileExists(fullPath)) { + backupOriginalFile(fullPath); + } + + uploadFile(tempPath, fullPath); + + // 5. 验证规则语法 + if (!validateAlertRules(fullPath)) { + log.error("告警规则 [{}] 验证失败,回滚更改", alarmRule.getName()); + restoreBackupFile(fullPath); + throw new Exception("告警规则语法验证失败"); + } + + // 6. 重载 Prometheus + reloadPrometheus(); + log.info("告警规则 [{}] 已成功同步至 Prometheus", alarmRule.getName()); + return true; + } + /** + * 校验告警规则文件的语法 (调用 promtool check rules) + */ + private boolean validateAlertRules(String filePath) throws Exception { + String promtoolPath = prometheusHome + "/promtool"; + String command = promtoolPath + " check rules " + filePath; + try { + String result = executeCommand(command); + return result.toLowerCase().contains("success") || result.toLowerCase().contains("check passed"); + } catch (Exception e) { + log.error("规则文件验证异常: {}", e.getMessage()); + return false; + } + } + /** + * 将 AlarmRule 实体转换为 Prometheus YAML 字符串 + */ + private String generatePrometheusAlertRule(AlarmRule alarmRule) { + StringBuilder sb = new StringBuilder(); + sb.append("groups:\n"); + sb.append("- name: ").append(alarmRule.getName()).append("_group\n"); + sb.append(" rules:\n"); + sb.append(" - alert: ").append(sanitizeMetricName(alarmRule.getName())).append("\n"); + + // 生成 PromQL 表达式 + String expr = buildExpression(alarmRule); + sb.append(" expr: ").append(expr).append("\n"); + + // 沉默周期/持续时间 + if (alarmRule.getSilenceCycle() != null) { + sb.append(" for: ").append(convertSecondsToDuration(alarmRule.getSilenceCycle())).append("\n"); + } + + // 标签 + sb.append(" labels:\n"); + sb.append(" severity: ").append(getSeverityLevel(alarmRule)).append("\n"); + sb.append(" source_type: \"").append(defaultString(alarmRule.getSourceType())).append("\"\n"); + sb.append(" resource_id: \"").append(defaultString(alarmRule.getSourceId())).append("\"\n"); + sb.append(" contact_group: \"").append(defaultString(alarmRule.getContactId())).append("\"\n"); + + // 注解 + Rule ruleObj = alarmRule.getRule(); + String conditionDesc = (ruleObj != null) ? ruleObj.joint() : "指标异常"; + + sb.append(" annotations:\n"); + sb.append(" summary: \"").append(alarmRule.getName()).append(" - ").append(conditionDesc).append("\"\n"); + sb.append(" description: \"告警名称: ").append(alarmRule.getName()) + .append("\\n资源ID: ").append(defaultString(alarmRule.getSourceId())) + .append("\\n监控条件: ").append(conditionDesc).append("\"\n"); + + return sb.toString(); + } + /** + * 构建 PromQL 表达式 + */ + private String buildExpression(AlarmRule alarmRule) { + Rule rule = alarmRule.getRule(); + if (rule != null && StrUtil.isNotBlank(rule.getName()) && rule.getThreshold() != null) { + // 先使用 Rule 实体中的定义 + return rule.getName() + " " + rule.getOperator() + " " + rule.getThreshold(); + } + // 最后使用 itemId 和默认阈值 + String metric = StrUtil.isNotBlank(alarmRule.getItemId()) ? alarmRule.getItemId() : "unknown_metric"; + return metric + " > 80"; + } + + + + private String convertSecondsToDuration(Long seconds) { + if (seconds >= 86400) return (seconds / 86400) + "d"; + if (seconds >= 3600) return (seconds / 3600) + "h"; + if (seconds >= 60) return (seconds / 60) + "m"; + return seconds + "s"; + } + + private String getSeverityLevel(AlarmRule alarmRule) { + // 可以根据 notification 或其他字段映射严重级别 + return "warning"; + } + + private String sanitizeMetricName(String name) { + // 将中文或特殊字符替换为下划线,保证符合 Prometheus 命名规范 + return name.replaceAll("[^a-zA-Z0-9_]", "_"); + } + + private String defaultString(String str) { + return StrUtil.isBlank(str) ? "unknown" : str; + } + + /** + * 写入临时文件 + */ + private void writeTempFile(String filePath, String content) throws IOException { + File file = new File(filePath); + File parentDir = file.getParentFile(); + if (parentDir != null && !parentDir.exists()) parentDir.mkdirs(); + try (FileWriter writer = new FileWriter(file)) { + writer.write(content); + } + } + + private void backupOriginalFile(String filePath) throws Exception { + String backupPath = filePath + ".backup"; + if (isLocalMode) { + File original = new File(filePath); + File backup = new File(backupPath); + java.nio.file.Files.copy(original.toPath(), backup.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING); + } else { + ChannelSftp sftp = getSftpChannel(); + sftp.rename(filePath, backupPath); + } + log.info("已备份文件: {} -> {}", filePath, backupPath); + } + + private void restoreBackupFile(String filePath) throws Exception { + String backupPath = filePath + ".backup"; + if (fileExists(backupPath)) { + if (isLocalMode) { + File backup = new File(backupPath); + File original = new File(filePath); + java.nio.file.Files.copy(backup.toPath(), original.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING); + backup.delete(); + } else { + ChannelSftp sftp = getSftpChannel(); + sftp.rename(backupPath, filePath); + } + log.info("已从备份恢复文件: {}", filePath); + } + } + + + + + + + + + /** + * 验证Prometheus配置 + */ + public boolean validatePrometheusConfig() throws Exception { + String command = promtoolPath + " check config " + prometheusConfigPath; + String result = executeCommand(command); + + log.info("Prometheus配置验证结果: {}", result); + + // 检查输出中是否包含"SUCCESS"或无错误信息 + return result.toLowerCase().contains("success") || + (!result.toLowerCase().contains("error") && + !result.toLowerCase().contains("failed")); + } + + /** + * 重新加载Prometheus + */ + public void reloadPrometheus() throws Exception { + String command = + "curl -X POST http://" + prometheusHost + ":" + prometheusPort + "/-/reload"; + String result = executeCommand(command); + + log.info("Prometheus重载结果: {}", result); + } + + /** + * 保存告警规则到文件 + */ + public void saveAlertRuleToFile(String ruleName, String ruleContent) throws Exception { + String tempRulePath = "/tmp/" + ruleName + ".yml"; + String finalRulePath = alertRulesPath + "/" + ruleName + ".yml"; + + // 写入临时文件 + writeTempFile(tempRulePath, ruleContent); + + // 上传到目标位置 + uploadFile(tempRulePath, finalRulePath); + + // 验证配置 + if (!validatePrometheusConfig()) { + // 如果验证失败,回滚更改 + deleteFile(finalRulePath); + throw new Exception("告警规则配置验证失败"); + } + + // 重新加载Prometheus + reloadPrometheus(); + + log.info("告警规则保存成功: {}", ruleName); + } + + + + + /** + * 获取所有告警规则 + */ + public List getAllAlertRules() throws Exception { + List rules = new ArrayList<>(); + + // 获取alert_rules目录下的所有YAML文件 + List files = listDirectory(alertRulesPath); + for (String file : files) { + if (file.toLowerCase().endsWith(".yml") || file.toLowerCase().endsWith(".yaml")) { + rules.add(file); + } + } + + return rules; + } + + /** + * 获取告警规则内容 + */ + public String getAlertRuleContent(String ruleName) throws Exception { + String rulePath = alertRulesPath + "/" + ruleName; + + if (isLocalMode) { + // 本地模式:直接读取文件 + try (BufferedReader reader = new BufferedReader(new FileReader(rulePath))) { + StringBuilder content = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) { + content.append(line).append("\n"); + } + return content.toString(); + } + } else { + // 远程模式:使用SFTP下载 + ChannelSftp sftp = getSftpChannel(); + try (InputStream inputStream = sftp.get(rulePath); + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { + + StringBuilder content = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) { + content.append(line).append("\n"); + } + return content.toString(); + } + } + } + + /** + * 删除告警规则 + */ + public void deleteAlertRule(String ruleName) throws Exception { + String rulePath = alertRulesPath + "/" + ruleName; + + // 删除规则文件 + deleteFile(rulePath); + + // 验证配置 + if (!validatePrometheusConfig()) { + throw new Exception("删除告警规则后配置验证失败"); + } + + // 重新加载Prometheus + reloadPrometheus(); + + log.info("告警规则删除成功: {}", ruleName); + } + + /** + * 更新告警规则 + */ + public void updateAlertRule(String ruleName, String newRuleContent) throws Exception { + String tempRulePath = "/tmp/" + ruleName + ".yml"; + String finalRulePath = alertRulesPath + "/" + ruleName; + + // 写入临时文件 + writeTempFile(tempRulePath, newRuleContent); + + // 上传到目标位置 + uploadFile(tempRulePath, finalRulePath); + + // 验证配置 + if (!validatePrometheusConfig()) { + throw new Exception("更新告警规则后配置验证失败"); + } + + // 重新加载Prometheus + reloadPrometheus(); + + log.info("告警规则更新成功: {}", ruleName); + } + + /** + * 关闭所有连接 + */ + public void close() { + if (channelExec != null && channelExec.isConnected()) { + channelExec.disconnect(); + } + + if (channelSftp != null && channelSftp.isConnected()) { + channelSftp.disconnect(); + } + + if (session != null && session.isConnected()) { + session.disconnect(); + } + + log.info("Prometheus Alert Manager 已关闭"); + } + + // Getters + public String getHost() { + return host; + } + + public String getAlertRulesPath() { + return alertRulesPath; + } + + public boolean isLocalMode() { + return isLocalMode; + } + + // Setters + public void setLocalMode(boolean localMode) { + isLocalMode = localMode; + if (localMode) { + adjustLocalTimeouts(); + } + } +} diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/PrometheusUtil.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/PrometheusUtil.java index 907ea84a..cca9a02c 100644 --- a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/PrometheusUtil.java +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/Util/PrometheusUtil.java @@ -6,6 +6,7 @@ import lombok.Data; import lombok.extern.slf4j.Slf4j; import org.jeecg.modules.base.entity.monitor.History; import org.springframework.beans.factory.annotation.Value; +import org.springframework.cloud.context.config.annotation.RefreshScope; import org.springframework.http.HttpEntity; import org.springframework.http.HttpMethod; import org.springframework.http.ResponseEntity; @@ -20,10 +21,13 @@ import java.util.*; import java.util.stream.Collectors; @Component +@RefreshScope @Slf4j public class PrometheusUtil { - @Value("${prometheus.url:http://172.21.170.11:9090}") + @Value("${prometheus.url:http://172.21.170.11}") private String prometheusUrl; + @Value("${prometheus.prometheusPort:9090}") + private int prometheusPort; @Value("${prometheus.connect-timeout:30}") private int connectTimeout; @@ -112,7 +116,7 @@ public class PrometheusUtil { private URI buildQueryUri(String path, String promql, Long start, Long end, Long step) { String encodedQuery = URLEncoder.encode(promql); StringBuilder url = new StringBuilder(); - url.append(prometheusUrl).append(path) + url.append(prometheusUrl + ":" + prometheusPort).append(path) .append("?query=").append(encodedQuery); if (start != null) { @@ -126,6 +130,7 @@ public class PrometheusUtil { } try { + log.info("构建URI: " + url); return new URI(url.toString()); } catch (Exception e) { throw new RuntimeException("构建URI失败: " + url, e); @@ -207,7 +212,7 @@ public class PrometheusUtil { //region 健康检查 public boolean healrhChenck() { try { - String url = prometheusUrl + "/-/healthy"; + String url = prometheusUrl + ":" + prometheusPort + "/-/healthy"; restTemplate.getForObject(url, String.class); return true; } catch (Exception e) { @@ -222,7 +227,7 @@ public class PrometheusUtil { * @return */ public JSONObject buildInfo() { - String url = prometheusUrl + "/api/v1/status/buildinfo"; + String url = prometheusUrl + ":" + prometheusPort + "/api/v1/status/buildinfo"; return get(url).getJSONObject("data"); } //endregion @@ -239,7 +244,7 @@ public class PrometheusUtil { public List queryInstantFull(String promql, Long time) { StringBuilder url = new StringBuilder(); - url.append(prometheusUrl).append("/api/v1/query?query=") + url.append(prometheusUrl+":"+prometheusPort).append("/api/v1/query?query=") .append(URLEncoder.encode(promql)); if (time != null) { @@ -402,7 +407,7 @@ public class PrometheusUtil { */ public List> series(List selectors, long start, long end) { StringBuilder url = new StringBuilder(); - url.append(prometheusUrl).append("/api/v1/series?"); + url.append(prometheusUrl+":"+prometheusPort).append("/api/v1/series?"); for (String selector : selectors) { url.append("match[]=").append(URLEncoder.encode(selector)) .append("&"); @@ -426,13 +431,11 @@ public class PrometheusUtil { } - - /** * 获取标签值列表 */ public Set labelValues(String labelName) { - String url = String.format("%s/api/v1/label/%s/values", prometheusUrl, + String url = String.format("%s/api/v1/label/%s/values", prometheusUrl+":"+prometheusPort, URLEncoder.encode(labelName)); JSONObject json = get(url); @@ -469,7 +472,7 @@ public class PrometheusUtil { * 获取元数据 */ public List metadata(String metric) { - String url = prometheusUrl + "/api/v1/metadata"; + String url = prometheusUrl+":"+prometheusPort + "/api/v1/metadata"; if (metric != null) { url = url + "&metric=" + URLEncoder.encode(metric); } @@ -500,7 +503,7 @@ public class PrometheusUtil { * @return List */ public List targets() { - String url = prometheusUrl + "/api/v1/targets"; + String url = prometheusUrl+":"+prometheusPort + "/api/v1/targets"; JSONObject json = get(url); JSONArray activeTargets = json.getJSONObject("data").getJSONArray("activeTargets"); List result = new ArrayList<>(); @@ -541,7 +544,7 @@ public class PrometheusUtil { * @return */ public List alertRules() { - String url = prometheusUrl + "/api/v1/alertRules"; + String url = prometheusUrl+":"+prometheusPort + "/api/v1/alertRules"; JSONObject json = get(url); JSONArray groups = json.getJSONObject("data").getJSONArray("groups"); List result = new ArrayList<>(); @@ -664,7 +667,34 @@ public class PrometheusUtil { } + /** + * 查询数据库挂载的磁盘容量信息 + */ + + public DiskMetrics queryDiskMetrics(String mountpoint) { + // 构建 PromQL 查询 + String promql = String.format( + "node_filesystem_size_bytes{mountpoint=\"%s\"} or " + + "node_filesystem_avail_bytes{mountpoint=\"%s\"} or " + + "node_filesystem_free_bytes{mountpoint=\"%s\"}", + mountpoint, mountpoint, mountpoint + ); + + // 调用 Prometheus API + List results = queryInstantFull(promql, null); + + return new DiskMetrics(); + } + + //endregion } + +@Data +class DiskMetrics { + private Double totalBytes; // 总字节数 + private Double availableBytes; // 可用字节数 + private Double freeBytes; // 空闲字节数 +} \ No newline at end of file diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/aspect/StatusAspect.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/aspect/StatusAspect.java index 3ef5fcd5..012cf62f 100644 --- a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/aspect/StatusAspect.java +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/aspect/StatusAspect.java @@ -22,6 +22,7 @@ import org.jeecg.common.constant.SymbolConstant; import org.jeecg.common.util.EmailUtil; import org.jeecg.common.util.JDBCUtil; import org.jeecg.common.util.RedisUtil; +import org.jeecg.modules.Util.MonitorConfig; import org.jeecg.modules.Util.PrometheusUtil; import org.jeecg.modules.base.dto.NameValue; import org.jeecg.modules.base.entity.monitor.Host; @@ -31,6 +32,7 @@ import org.jeecg.modules.base.entity.postgre.SysDatabase; import org.jeecg.modules.base.entity.postgre.SysEmail; import org.jeecg.modules.base.entity.postgre.SysServer; import org.jeecg.modules.base.enums.ServerStatus; +import org.jeecg.modules.entity.MonitorItem; import org.jeecg.modules.feignclient.ManageUtil; import org.jeecg.modules.feignclient.MonitorAlarm; import org.jeecg.modules.qiyeEmail.base.InstanceSDK; @@ -120,14 +122,14 @@ public class StatusAspect { List hosts = new ArrayList<>(); //通过prometheus获取type=db的数据 up{type="db"} List hostResults = - prometheusUtil.queryInstantFull("max by (name) (up{type=\"db\"})", null); + prometheusUtil.queryInstantFull("up{type=\"db\"}", null); for (PrometheusUtil.QueryResult result : hostResults) { Host host = new Host(); String hostName = result.getMetrics().get("name"); String job = result.getMetrics().get("job"); String instance = result.getMetrics().get("instance"); - Double status = result.getValue().getValue(); - host.setCode(job); + int status = (int) result.getValue().getValue(); + host.setCode(hostName); host.setHostId(instance); host.setName(hostName); host.setStatus(String.valueOf(status)); @@ -206,7 +208,7 @@ public class StatusAspect { List hostResults = prometheusUtil.queryInstantFull( - "node_boot_time_seconds{instance=\"" + name + "\"}", null); + "node_boot_time_seconds{instance=~\"" + name + ".*\"}", null); if (ObjectUtil.isNull(hostResults) || CollUtil.isEmpty(hostResults)) { redisUtil.hset(key, id, new NameValue(name, online)); @@ -217,7 +219,8 @@ public class StatusAspect { Host host = new Host(); String hostName = result.getMetrics().get("__name__"); String instance = result.getMetrics().get("instance"); - Double status = 1.0; + int status = 1; + host.setCode(instance); host.setHostId(instance); host.setName(hostName); host.setStatus(String.valueOf(status)); @@ -240,24 +243,30 @@ public class StatusAspect { ServerStatus.WARN.getValue()), status); redisUtil.hset(key, id, new NameValue(name, online)); // 更新该服务器的HostId - server.setHostId(host.getHostId()); + String hostId = host.getHostId(); + server.setHostId(hostId); serverService.updateById(server); // 同步服务器监控项 获取所有指标 - Set metric = prometheusUtil.metricNames(); - List metricNames =new ArrayList<>(); - metricNames.add("instance=\"172.21.170.11:9090\""); - long end = System.currentTimeMillis() / 1000; - long start = end - 3600; // 过去1小时 - prometheusUtil.series(metricNames,start, end); - Map itemMap = host.getItems(); - if (MapUtil.isEmpty(itemMap) || CollUtil.isEmpty(itemMap.values())) { - return; - } - Collection items = itemMap.values(); + Collection monitorItems = MonitorConfig.getAllItems(); + + //Set metric = prometheusUtil.metricNames(); + //List metricNames =new ArrayList<>(); + //metricNames.add("instance=\""+host.getHostId()+"\""); + //long end = System.currentTimeMillis() / 1000; + //long start = end - 3600; // 过去1小时 + //prometheusUtil.series(metricNames,start, end); +// Map itemMap = host.getItems(); +// if (MapUtil.isEmpty(itemMap) || CollUtil.isEmpty(itemMap.values())) { +// return; +// } + //Collection items = itemMap.values(); + List alarmItems = new ArrayList<>(); - items.forEach(item -> { + + monitorItems.forEach(item -> { AlarmItem alarmItem = BeanUtil.copyProperties(item, AlarmItem.class); alarmItem.setId(item.getItemId()); + alarmItem.setHostId(hostId); alarmItems.add(alarmItem); }); alarmItemService.saveOrUpdateBatch(alarmItems); diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/MetricConfig.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/MetricConfig.java index d06058b5..0228b0d2 100644 --- a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/MetricConfig.java +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/MetricConfig.java @@ -12,32 +12,102 @@ public class MetricConfig { public static final Map METRIC_MAP = new HashMap<>(); static { - // 根据你提供的 getItems 返回结果进行 1:1 映射 - METRIC_MAP.put("37494", new MetricConfig("cpuUtilization", + // 根据之前讨论的固定ID进行映射 + // CPU相关 (1001-1003) + METRIC_MAP.put("1001", new MetricConfig("cpu_usage_percent", + "(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[5m]))) * 100", + "%")); + METRIC_MAP.put("1002", new MetricConfig("cpuUtilization", "(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[1m]))) * 100", "%")); - METRIC_MAP.put("37445", - new MetricConfig("swapTotalSize", "node_memory_SwapTotal_bytes{instance='%s'}", - "B")); - METRIC_MAP.put("37469", new MetricConfig("load", "node_load1{instance='%s'}", "")); - METRIC_MAP.put("37452", new MetricConfig("swapUtilization", - "(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100", - "%")); - METRIC_MAP.put("37443", - new MetricConfig("freeSwapSpace", "node_memory_SwapFree_bytes{instance='%s'}", - "B")); - METRIC_MAP.put("37460", new MetricConfig("latency", - "irate(node_disk_read_time_seconds_total{instance='%s'}[1m]) * 1000", "ms")); - METRIC_MAP.put("37449", new MetricConfig("memoryUtilization", + METRIC_MAP.put("1003", new MetricConfig("system_load_average", + "node_load1{instance='%s'}", + "load")); + + // 内存相关 (1101-1107) + METRIC_MAP.put("1101", new MetricConfig("memory_used_percent", "(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100", "%")); - METRIC_MAP.put("37660", new MetricConfig("throughput", - "sum(irate(node_network_receive_bytes_total{instance='%s',device!~'lo'}[1m]))", + METRIC_MAP.put("1102", new MetricConfig("memoryUtilization", + "(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100", + "%")); + METRIC_MAP.put("1103", new MetricConfig("memAvailable", + "node_memory_MemAvailable_bytes{instance='%s'}", "B")); - METRIC_MAP.put("37493", new MetricConfig("responseSuccessRate", "vector(100)", "%")); - METRIC_MAP.put("37454", - new MetricConfig("memAvailable", "node_memory_MemAvailable_bytes{instance='%s'}", - "B")); + METRIC_MAP.put("1104", new MetricConfig("swapUtilization", + "(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100", + "%")); + METRIC_MAP.put("1105", new MetricConfig("swapTotalSize", + "node_memory_SwapTotal_bytes{instance='%s'}", + "B")); + METRIC_MAP.put("1106", new MetricConfig("freeSwapSpace", + "node_memory_SwapFree_bytes{instance='%s'}", + "B")); + METRIC_MAP.put("1107", new MetricConfig("swap_used_percent", + "(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100", + "%")); + + // 磁盘相关 (1201-1202) + METRIC_MAP.put("1201", new MetricConfig("disk_io_read_bytes", + "sum(irate(node_disk_read_bytes_total{instance='%s'}[5m]))", + "bytes/s")); + METRIC_MAP.put("1202", new MetricConfig("disk_io_write_bytes", + "irate(node_disk_written_bytes_total{instance='%s'}[5m])", + "bytes/s")); + + // 网络相关 (1301-1306) + METRIC_MAP.put("1301", new MetricConfig("network_receive_bytes", + "irate(node_network_receive_bytes_total{instance='%s',device!='lo'}[5m])", + "bytes/s")); + METRIC_MAP.put("1302", new MetricConfig("network_transmit_bytes", + "irate(node_network_transmit_bytes_total{instance='%s',device!='lo'}[5m])", + "bytes/s")); + METRIC_MAP.put("1303", new MetricConfig("throughput", + "sum(irate(node_network_receive_bytes_total{instance='%s',device!~'lo'}[1m]))", + "Kb/s")); + METRIC_MAP.put("1304", new MetricConfig("latency_ms", + "avg(irate(node_network_receive_bytes_total{instance='%s'}[5m])) > 0", + "ms")); + METRIC_MAP.put("1305", new MetricConfig("latency_s", + "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{instance='%s'}[5m])) by (le))", + "s")); + METRIC_MAP.put("1306", new MetricConfig("tcp_established_connections", + "node_netstat_Tcp_CurrEstab{instance='%s'}", + "count")); + + // 进程相关 (1401) + METRIC_MAP.put("1401", new MetricConfig("process_count", + "node_procs_running{instance='%s'}", + "count")); + + // 数据库相关 (1501-1502) + METRIC_MAP.put("1501", new MetricConfig("dbMemory", + "pg_database_size_bytes{datname='armd',instance='%s'}/1024", + "Kb")); + METRIC_MAP.put("1502", new MetricConfig("dblSize", + "sum(pg_database_size_bytes{instance='%s'})", + "B")); + + // 应用相关 (1601-1603) + METRIC_MAP.put("1601", new MetricConfig("responseSuccessRate", + "sum(rate(http_requests_total{instance='%s',status=~'2..'}[5m])) / sum(rate(http_requests_total{instance='%s'}[5m])) * 100", + "%")); + METRIC_MAP.put("1602", new MetricConfig("login", + "sum(increase(login_attempts_total{instance='%s'}[5m]))", + "")); + METRIC_MAP.put("1603", new MetricConfig("connections", + "node_netstat_Tcp_CurrEstab{instance='%s'}", + "")); + + // 日志相关 (1701) + METRIC_MAP.put("1701", new MetricConfig("logRemainingSize", + "node_filesystem_avail_bytes{instance='%s',mountpoint=~'/var/log.*|/opt/log.*'}", + "B")); + + // 系统负载 (1801) + METRIC_MAP.put("1801", new MetricConfig("load", + "node_load1{instance='%s'}", + "")); } private String name; diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/MonitorItem.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/MonitorItem.java new file mode 100644 index 00000000..c006b0da --- /dev/null +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/MonitorItem.java @@ -0,0 +1,25 @@ +package org.jeecg.modules.entity; + +import lombok.Data; +import org.jeecg.modules.base.entity.monitor.Item; + +@Data +public class MonitorItem extends Item { + + // 构造方法 + public MonitorItem() { + } + + public MonitorItem(String itemId, String name, String hostId, String description, + String units, String status, org.jeecg.modules.entity.ValueType valueType) { + + this.setItemId(itemId); + this.setName(name); + this.setHostId(hostId); + this.setDescription(description); + this.setUnits(units); + this.setStatus(status); + this.setValueType(String.valueOf(valueType.getCode())); + + } +} diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/ValueType.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/ValueType.java new file mode 100644 index 00000000..101b8433 --- /dev/null +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/entity/ValueType.java @@ -0,0 +1,83 @@ +package org.jeecg.modules.entity; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +public enum ValueType { + + // 使用枚举值和对应的数字代码 + INT(1, "int", "整型"), + FLOAT(2, "float", "浮点型"), + DOUBLE(3, "double", "双精度浮点型"), + LONG(4, "long", "长整型"), + STRING(5, "string", "字符串类型"), + BOOLEAN(6, "boolean", "布尔类型"); + + private final int code; // 数字代码 + private final String name; // 类型名称 + private final String desc; // 描述 + + ValueType(int code, String name, String desc) { + this.code = code; + this.name = name; + this.desc = desc; + } + + public int getCode() { + return code; + } + + public String getName() { + return name; + } + + public String getDesc() { + return desc; + } + + /** + * 根据代码获取枚举 + */ + public static ValueType fromCode(int code) { + for (ValueType type : values()) { + if (type.code == code) { + return type; + } + } + throw new IllegalArgumentException("无效的值类型代码: " + code); + } + + /** + * 根据名称获取枚举 + */ + public static ValueType fromName(String name) { + for (ValueType type : values()) { + if (type.name.equalsIgnoreCase(name)) { + return type; + } + } + throw new IllegalArgumentException("无效的值类型名称: " + name); + } + + /** + * 验证代码是否有效 + */ + public static boolean isValidCode(int code) { + for (ValueType type : values()) { + if (type.code == code) { + return true; + } + } + return false; + } + + /** + * 获取所有代码列表 + */ + public static List getAllCodes() { + return Arrays.stream(values()) + .map(ValueType::getCode) + .collect(Collectors.toList()); + } +} diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/IMonitorService.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/IMonitorService.java index c0e9b2be..3a30c63f 100644 --- a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/IMonitorService.java +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/IMonitorService.java @@ -16,8 +16,7 @@ public interface IMonitorService { void dbDetails(String start, String end, String hostId); - Result queryCpuHistoryData(String itemId, Integer itemType, String start, String end, - String instance); + Result queryCpuHistoryData(String itemId, Integer itemType, String start, String end); Result detail( String hostId, String pageName, String start, String end); diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/impl/MonitorServiceImpl.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/impl/MonitorServiceImpl.java index abddef4c..5f468edf 100644 --- a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/impl/MonitorServiceImpl.java +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/impl/MonitorServiceImpl.java @@ -35,6 +35,7 @@ import org.jeecg.modules.base.entity.monitor.History; import org.jeecg.modules.base.entity.monitor.Host; import org.jeecg.modules.base.entity.monitor.Item; import org.jeecg.modules.base.entity.monitor.ItemHistory; +import org.jeecg.modules.base.entity.postgre.AlarmItem; import org.jeecg.modules.base.entity.postgre.SysDatabase; import org.jeecg.modules.base.entity.postgre.SysServer; import org.jeecg.modules.base.enums.ServerStatus; @@ -42,6 +43,7 @@ import org.jeecg.modules.entity.*; import org.jeecg.modules.feignclient.ManageUtil; import org.jeecg.modules.feignclient.MonitorAlarm; import org.jeecg.modules.feignclient.SystemClient; +import org.jeecg.modules.mapper.AlarmItemMapper; import org.jeecg.modules.mapper.SysServerMapper; import org.jeecg.modules.service.IMonitorService; import org.jeecg.modules.service.ISysDatabaseService; @@ -77,6 +79,8 @@ public class MonitorServiceImpl implements IMonitorService { @Autowired private SystemClient systemClient; + @Autowired + private AlarmItemMapper alarmItemMapper; @Autowired private ISysDatabaseService sysDatabaseService; @@ -86,41 +90,8 @@ public class MonitorServiceImpl implements IMonitorService { private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); - private static final String INSTANCE_9100 = "172.21.170.11:9100"; - //private static final String INSTANCE_9256 = "172.21.170.11:9256"; //endregion - // region 数据库监控 PromQL - - private static final String PG_INSTANCE = "172.21.170.11.*"; - - private static final String SHARED_BUFFERS = - "pg_settings_shared_buffers_bytes{instance=~\"" + PG_INSTANCE + "\"}"; - - private static final String ACTIVE_CONNECTIONS = - "sum(pg_stat_database_numbackends{instance=~\"" + PG_INSTANCE - + "\",datname!~\"template[0-1]\"})"; - - private static final String CONNECTION_NUMBER = ACTIVE_CONNECTIONS; - - private static final String BUFFER_HIT_RATIO = - "pg_stat_database_blks_hit{instance=~\"" + PG_INSTANCE + "\",datname=\"postgres\"}" - + " / " - + "(pg_stat_database_blks_hit{instance=~\"" + PG_INSTANCE - + "\",datname=\"postgres\"}" - + " + pg_stat_database_blks_read{instance=~\"" + PG_INSTANCE - + "\",datname=\"postgres\"})" - + " * 100"; - - private static final String WAL_SIZE = - "pg_settings_max_wal_size_bytes{instance=~\"" + PG_INSTANCE + "\"}"; - - private static final String DATABASE_SIZE = - "sum(pg_database_size_bytes{instance=~\"" + PG_INSTANCE + "\"})"; - - // endregion - - //region 通用查询方法 /** @@ -159,8 +130,11 @@ public class MonitorServiceImpl implements IMonitorService { //region CPU 监控 @Override public Result queryCpuHistoryData(String itemId, Integer itemType, - String start, String end, String instance) { + String start, String end) { try { + //通过itemId查找监控项 + AlarmItem alarmItem = alarmItemMapper.selectById(itemId); + String instance = alarmItem.getHostId(); long startTime = parseTimestamp(start); long endTime = parseTimestamp(end); @@ -196,7 +170,7 @@ public class MonitorServiceImpl implements IMonitorService { String promql = String.format( "1 - avg(rate(node_cpu_seconds_total{instance='%s', mode='idle'}[1m]))", - INSTANCE_9100); + hostId); List results = queryRange(promql, start, end, 60); return parseToCpuResult(results, "cpuUtilization", "%", true); @@ -209,7 +183,7 @@ public class MonitorServiceImpl implements IMonitorService { String promql = String.format( "(node_filesystem_avail_bytes{instance='%s', mountpoint='/'} / " + "node_filesystem_size_bytes{instance='%s', mountpoint='/'}) * 100", - INSTANCE_9100, INSTANCE_9100); + hostId, hostId); List results = queryRange(promql, start, end, 60); return parseToCpuResult(results, "cpuIdleRate", "%", false); @@ -220,7 +194,7 @@ public class MonitorServiceImpl implements IMonitorService { long end = parseToUnix(endStr); String promql = String.format( - "irate(node_intr_total{instance='%s'}[1m])", INSTANCE_9100); + "irate(node_intr_total{instance='%s'}[1m])", hostId); List results = queryRange(promql, start, end, 60); return parseToCpuResult(results, "cpuInterrupt", "", false); @@ -231,7 +205,7 @@ public class MonitorServiceImpl implements IMonitorService { long end = parseToUnix(endStr); String promql = String.format( - "irate(node_context_switches_total{instance='%s'}[1m])", INSTANCE_9100); + "irate(node_context_switches_total{instance='%s'}[1m])", hostId); List results = queryRange(promql, start, end, 60); return parseToCpuResult(results, "cpuSwitch", "", false); @@ -243,7 +217,7 @@ public class MonitorServiceImpl implements IMonitorService { String promql = String.format( "sum by (cpu) (irate(node_cpu_seconds_total{instance='%s', mode!='idle'}[1m])) * 100", - INSTANCE_9100); + instance); List results = queryRange(promql, start, end, 60); List coreResults = new ArrayList<>(); @@ -306,7 +280,6 @@ public class MonitorServiceImpl implements IMonitorService { } //endregion - //region 数据库 监控 @Override public void dbDetails(String start, String end, String hostId) { @@ -338,36 +311,44 @@ public class MonitorServiceImpl implements IMonitorService { case "postgresql": //postgres result.add(buildItemFromQuery("Database memory state(Total memory)", "B", - SHARED_BUFFERS, startTime, endTime, step)); + "pg_settings_shared_buffers_bytes{instance='"+hostId+"'}", startTime, endTime, step)); result.add(buildItemFromQuery("Database user status(Logins per second)", "", - ACTIVE_CONNECTIONS, startTime, endTime, step)); + "sum(pg_stat_database_numbackends{instance=~\"" + hostId + + "\",datname!~\"template[0-1]\"})", startTime, endTime, step)); result.add(buildItemFromQuery("Database connection number", "", - CONNECTION_NUMBER, startTime, endTime, step)); + "sum(pg_stat_database_numbackends{instance=~\"" + hostId + + "\",datname!~\"template[0-1]\"})", startTime, endTime, step)); result.add(buildItemFromQuery("Database connection response time", "", - BUFFER_HIT_RATIO, startTime, endTime, step)); + "pg_stat_database_blks_hit{instance=~\"" + hostId + "\",datname=\"postgres\"}" + + " / " + + "(pg_stat_database_blks_hit{instance=~\"" + hostId + + "\",datname=\"postgres\"}" + + " + pg_stat_database_blks_read{instance=~\"" + hostId + + "\",datname=\"postgres\"})" + + " * 100", startTime, endTime, step)); result.add(buildItemFromQuery("Remaining space of the database log file", "B", - WAL_SIZE, startTime, endTime, step)); + "pg_settings_max_wal_size_bytes{instance=~\"" + hostId + "\"}", startTime, endTime, step)); result.add(buildItemFromQuery("Database file size", "B", - DATABASE_SIZE, startTime, endTime, step)); + "sum(pg_database_size_bytes{instance=~\"" + hostId + "\"})", startTime, endTime, step)); break; case "Oracle": //TODO 待修改,优化promql语句 //Oracle result.add(buildItemFromQuery("Database memory state(Total memory)", "B", - "oracledb_sysmetric_total_pga_allocated{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"}", + "oracledb_sysmetric_total_pga_allocated{instance=\""+hostId+"\"}", startTime, endTime, step)); result.add(buildItemFromQuery("Database user status(Logins per second)", "", "oracledb_sysmetric_logons_per_sec", startTime, endTime, step)); result.add(buildItemFromQuery("Database connection number", "", "oracledb_sysmetric_session_count", startTime, endTime, step)); result.add(buildItemFromQuery("Database connection response time", "", - "oracledb_sysmetric_sql_service_response_time{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"}", + "oracledb_sysmetric_sql_service_response_time{instance=\""+hostId+"\"}", startTime, endTime, step)); result.add(buildItemFromQuery("Remaining space of the database log file", "B", - "oracledb_env_redo_value{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\",name=\"log_size\"}", + "oracledb_env_redo_value{instance=\""+hostId+"\",name=\"log_size\"}", startTime, endTime, step)); result.add(buildItemFromQuery("Database file size", "B", - "sum(oracledb_tablespace_bytes{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"})", + "sum(oracledb_tablespace_bytes{instance=\""+hostId+"\"})", startTime, endTime, step)); break; } @@ -409,7 +390,6 @@ public class MonitorServiceImpl implements IMonitorService { //region 服务器与进程 @Override public Result detail(String hostId, String pageName, String start, String end) { - hostId = "172.21.170.10"; switch (pageName) { case "serviceAndProcess": @@ -426,19 +406,25 @@ public class MonitorServiceImpl implements IMonitorService { private HostDetailsResult getHostDetails(String hostIp, String start, String end) { long startTime = parseTimestamp(start); long endTime = parseTimestamp(end); + long step = calculateStep(startTime, endTime); + String cpuQuery = + "topk(5,\n" + + " avg_over_time(\n" + + " sum by (groupname) (\n" + + " irate(namedprocess_namegroup_cpu_seconds_total[5m])\n" + + " )[5m:]\n" + + " )\n" + + ")"; - String cpuQuery = String.format( - "topk(5, sum by (groupname) (irate(namedprocess_namegroup_cpu_seconds_total{instance='%s'}[5m])))", - hostIp + ":9256"); String memQuery = String.format( "topk(5,namedprocess_namegroup_memory_bytes{instance=~\"%s\", memtype=\"resident\"}" + "/scalar(max(node_memory_MemTotal_bytes{instance=~\"%s\"})))", - hostIp + ":9256", hostIp + ":9100"); + hostIp, hostIp); HostDetailsResult result = new HostDetailsResult(); - result.setCpu(queryProcessMetrics(cpuQuery, startTime, endTime, true)); - result.setMemory(queryProcessMetrics(memQuery, startTime, endTime, true)); + result.setCpu(queryProcessMetrics(cpuQuery, startTime, endTime, true, step)); + result.setMemory(queryProcessMetrics(memQuery, startTime, endTime, true, step)); return result; } @@ -448,13 +434,18 @@ public class MonitorServiceImpl implements IMonitorService { * @param isPercent 是否乘以 100 转百分比 */ private List queryProcessMetrics( - String promql, long start, long end, boolean isPercent) { + String promql, long start, long end, boolean isPercent, long step) { - List results = queryRange(promql, start, end, 60); + List resultList = queryRange(promql, start, end, step); + List safeSorted = resultList.stream() + .sorted(Comparator.comparingInt(qr -> + qr.getValues() == null ? 0 : qr.getValues().size() + )) + .collect(Collectors.toList()); List metrics = new ArrayList<>(); double factor = isPercent ? 100.0 : 1.0; - for (PrometheusUtil.QueryResult qr : results) { + for (PrometheusUtil.QueryResult qr : safeSorted) { String name = qr.getMetrics() != null ? qr.getMetrics().getOrDefault("groupname", "unknown") : "unknown"; @@ -465,8 +456,8 @@ public class MonitorServiceImpl implements IMonitorService { if (qr.getValues() != null) { for (PrometheusUtil.DataPoint p : qr.getValues()) { history.add(new HostDetailsResult.HistoryPoint( - p.getTimestamp(), p.getValue())); - stats.accept(p.getValue()); + p.getTimestamp(), p.getValue() / 100)); + stats.accept(p.getValue() / 100); } } @@ -520,17 +511,20 @@ public class MonitorServiceImpl implements IMonitorService { @Override public Result item(String itemId, Integer itemType, String start, String end) { MetricConfig config = MetricConfig.METRIC_MAP.get(itemId); + //通过itemId查找监控项 + AlarmItem alarmItem = alarmItemMapper.selectById(itemId); + if (config == null) { return null; } - + String instance = alarmItem.getHostId(); try { long startTs = LocalDateTime.parse(start, FORMATTER) .toEpochSecond(ZoneOffset.ofHours(8)); long endTs = LocalDateTime.parse(end, FORMATTER) .toEpochSecond(ZoneOffset.ofHours(8)); - String promql = String.format(config.getPromQl(), INSTANCE_9100, INSTANCE_9100); + String promql = String.format(config.getPromQl(), instance, instance); List results = queryRange(promql, startTs, endTs, 60); List> historyPoints = new ArrayList<>(); @@ -558,7 +552,7 @@ public class MonitorServiceImpl implements IMonitorService { return Result.OK(finalResult); } catch (Exception e) { - log.error("查询指标项失败: itemId={}", itemId, e); + log.error("查询指标项失败: instance={}", instance, e); return null; } } diff --git a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/impl/SysDatabaseServiceImpl.java b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/impl/SysDatabaseServiceImpl.java index 4df3221e..a41c350f 100644 --- a/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/impl/SysDatabaseServiceImpl.java +++ b/jeecg-module-abnormal-alarm/src/main/java/org/jeecg/modules/service/impl/SysDatabaseServiceImpl.java @@ -20,6 +20,7 @@ import org.jeecg.common.system.vo.DictModel; import org.jeecg.common.util.JDBCUtil; import org.jeecg.common.util.NumUtil; import org.jeecg.common.util.RedisUtil; +import org.jeecg.modules.Util.PrometheusAlertManager; import org.jeecg.modules.Util.PrometheusUtil; import org.jeecg.modules.base.dto.*; import org.jeecg.modules.base.entity.monitor.Host; @@ -44,6 +45,11 @@ import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import java.io.IOException; +import java.nio.file.FileStore; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -68,6 +74,8 @@ public class SysDatabaseServiceImpl extends ServiceImpl findPage(QueryRequest query) { @@ -116,7 +124,9 @@ public class SysDatabaseServiceImpl extends ServiceImpl GB @@ -374,6 +384,18 @@ public class SysDatabaseServiceImpl extends ServiceImpl mapper = new SpaceRowMapper(); + JdbcTemplate template = JDBCUtil.template(dbUrl, dbDriver, dbUsername, dbPassword); + if (ObjectUtil.isNull(template)) { + return spaceInfos; + } + spaceInfos = template.query(DBSQL.SPACE_PG, mapper); + } return spaceInfos; } @@ -541,7 +563,8 @@ public class SysDatabaseServiceImpl extends ServiceImpl queryResultList = + prometheusUtil.queryInstantFull(osInfoquery, null); + if (ObjectUtil.isNotNull(queryResultList) && !queryResultList.isEmpty()) { + Map labels = queryResultList.get(0).getMetrics(); + basicInfo.setOsName(labels.get("name")); + basicInfo.setOsVersion(labels.get("version")); } // 指标包含: nodename(hostname), release(osVersion), sysname(osName), machine(cpuType) - JSONObject unameMetric = - queryPromSingleMetric("node_uname_info{instance=\"" + instance + "\"}"); - if (ObjectUtil.isNotNull(unameMetric)) { - JSONObject labels = unameMetric.getJSONObject("metric"); - basicInfo.setHostName(labels.getString("nodename")); - basicInfo.setKernelVersion(labels.getString("release")); + String unameQuery = String.format("node_uname_info{instance=\"%s\"}", instance); + List unameQueryList = + prometheusUtil.queryInstantFull(unameQuery, null); + + if (ObjectUtil.isNotNull(unameQueryList) && !unameQueryList.isEmpty()) { + Map labels = unameQueryList.get(0).getMetrics(); + + basicInfo.setHostName(labels.get("nodename")); + basicInfo.setKernelVersion(labels.get("release")); // basicInfo.setOsName(labels.getString("sysname")); - basicInfo.setCpuType(labels.getString("machine")); + basicInfo.setCpuType(labels.get("machine")); } // --- 服务器状态与运行时间 --- - Double upValue = queryPromSingleValue("up{instance='" + instance + "'}"); + Double upValue = + prometheusUtil.queryInstant("up{instance='" + instance + "'}"); basicInfo.setRunningState(ObjectUtil.isNotNull(upValue) && upValue == 1.0); - Double bootTime = queryPromSingleValue( - "time() - node_boot_time_seconds{instance='" + instance + "'}"); + Double bootTime = + prometheusUtil.queryInstant( + "time() - node_boot_time_seconds{instance='" + instance + "'}"); basicInfo.setRunTime( ObjectUtil.isNull(bootTime) ? "--" : NumberUtil.round(bootTime / 3600.0, 1) + "h"); // --- 硬件规格 --- - Double memTotal = - queryPromSingleValue("node_memory_MemTotal_bytes{instance='" + instance + "'}"); + Double memTotal = prometheusUtil.queryInstant( + "node_memory_MemTotal_bytes{instance='" + instance + "'}"); basicInfo.setRamSize(ObjectUtil.isNull(memTotal) ? "--" : NumberUtil.round(memTotal / 1024 / 1024 / 1024, 1) + "GB"); - Double cpuCores = queryPromSingleValue( - "count(node_cpu_seconds_total{instance='" + instance + "',mode='idle'})"); + Double cpuCores = + prometheusUtil.queryInstant( + "count(node_cpu_seconds_total{instance='" + instance + "',mode='idle'})"); basicInfo.setCpuCores( ObjectUtil.isNull(cpuCores) ? "--" : String.valueOf(cpuCores.intValue())); @@ -446,34 +452,36 @@ public class SysServerServiceImpl extends ServiceImpl dmiResultList = + prometheusUtil.queryInstantFull(dmiQuery, null); + if (ObjectUtil.isNotNull(dmiResultList) && !dmiResultList.isEmpty()) { - JSONObject dmiMetric = dmiResult.getJSONObject("metric"); - basicInfo.setBiosVersion(dmiMetric.getString("bios_version")); - basicInfo.setBiosSupplier(dmiMetric.getString("bios_vendor")); - basicInfo.setManufacturer(dmiMetric.getString("system_vendor")); - basicInfo.setModelNumber(dmiMetric.getString("product_name")); - String timeZone = dmiMetric.getString("time_zone"); + Map dmiMetric = dmiResultList.get(0).getMetrics(); + basicInfo.setBiosVersion(dmiMetric.get("bios_version")); + basicInfo.setBiosSupplier(dmiMetric.get("bios_vendor")); + basicInfo.setManufacturer(dmiMetric.get("system_vendor")); + basicInfo.setModelNumber(dmiMetric.get("product_name")); + String timeZone = dmiMetric.get("time_zone"); basicInfo.setZone(timeZone); } //startTime - JSONObject startTime = - getNodeDmiInfoMetric("node_boot_time_seconds{instance=\"" + instance + "\"}"); - if (ObjectUtil.isNotNull(startTime)) { + List startTimeList = prometheusUtil.queryInstantFull( + "node_boot_time_seconds{instance=\"" + instance + "\"}", null); + if (ObjectUtil.isNotNull(startTimeList) && !startTimeList.isEmpty()) { // 解析 - JSONArray array = startTime.getJSONArray("value"); + PrometheusUtil.DataPoint dataPoint = startTimeList.get(0).getValue(); Integer time = null; - if (array != null && array.size() > 1) { - Object value = array.get(1); + if (dataPoint != null) { + Object value = dataPoint.getValue(); if (value instanceof String) { time = Double.valueOf((String) value).intValue(); } else if (value instanceof Number) { @@ -487,21 +495,21 @@ public class SysServerServiceImpl extends ServiceImpl queryResults = + prometheusUtil.queryInstantFull(promql, null); + for (int i = 0; i < queryResults.size(); i++) { + Map item = queryResults.get(i).getMetrics(); + String mountpoint = item.get("mountpoint"); + PrometheusUtil.DataPoint dataPoint = queryResults.get(i).getValue(); + Double val = dataPoint.getValue(); xData.add(mountpoint); yData.add(NumberUtil.round(val, 1).doubleValue()); } @@ -705,23 +701,6 @@ public class SysServerServiceImpl extends ServiceImpl