修改告警系统监控项,

This commit is contained in:
duwenyuan 2026-05-12 15:21:27 +08:00
parent 75929c4c8a
commit 59e337065b
11 changed files with 1306 additions and 198 deletions

View File

@ -0,0 +1,214 @@
package org.jeecg.modules.Util;
import org.jeecg.modules.base.entity.monitor.Item;
import org.jeecg.modules.entity.MonitorItem;
import org.jeecg.modules.entity.ValueType;
import java.util.*;
import java.util.stream.Collectors;
/**
* 监控项配置类 - 包含所有预定义的监控项
*/
public class MonitorConfig {
// 默认主机ID
public static final String DEFAULT_HOST_ID = "host-001";
// 默认状态
public static final String DEFAULT_STATUS = "active";
// 按类别分组存储监控项
private static final Map<String, List<Item>> CATEGORY_ITEMS = new HashMap<>();
// ID生成器
private static int nextId = 1001;
static {
initializeAllItems();
}
/**
* 初始化所有监控项
*/
private static void initializeAllItems() {
// 生成ID列表
Map<String, Integer> idMap = new HashMap<>();
// CPU相关监控项
List<Item> cpuItems = Arrays.asList(
createItem(1001, "cpu_usage_percent", "CPU使用率百分比反映服务器计算资源占用情况", "%", ValueType.FLOAT),
createItem(1002, "cpuUtilization", "CPU utilization in %", "%", ValueType.FLOAT),
createItem(1003, "system_load_average", "系统平均负载1分钟内等待运行的进程平均数", "load", ValueType.FLOAT)
);
// 内存相关监控项
List<Item> memoryItems = Arrays.asList(
createItem(1101, "memory_used_percent", "内存使用率,服务器物理内存占用比例", "%", ValueType.FLOAT),
createItem(1102, "memoryUtilization", "Memory used percentage is calculated as (100-pavailable)", "%", ValueType.FLOAT),
createItem(1103, "memAvailable", "服务器物理内存可用空间", "B", ValueType.LONG),
createItem(1104, "swapUtilization", "服务器交换内存使用率", "%", ValueType.FLOAT),
createItem(1105, "swapTotalSize", "The total space of swap volume/file in bytes", "B", ValueType.LONG),
createItem(1106, "freeSwapSpace", "The free space of swap volume/file in bytes", "B", ValueType.LONG),
createItem(1107, "swap_used_percent", "交换分区(Swap)使用率,内存不足时的虚拟内存占用", "%", ValueType.FLOAT)
);
// 磁盘相关监控项
List<Item> diskItems = Arrays.asList(
createItem(1201, "disk_io_read_bytes", "磁盘读取吞吐量,每秒读取的数据量", "bytes/s", ValueType.FLOAT),
createItem(1202, "disk_io_write_bytes", "磁盘写入吞吐量,每秒写入的数据量", "bytes/s", ValueType.FLOAT)
);
// 网络相关监控项
List<Item> networkItems = Arrays.asList(
createItem(1301, "network_receive_bytes", "网络接收流量,网卡入站数据传输速率", "bytes/s", ValueType.FLOAT),
createItem(1302, "network_transmit_bytes", "网络发送流量,网卡出站数据传输速率", "bytes/s", ValueType.FLOAT),
createItem(1303, "throughput", "网卡吞吐量", "Kb/s", ValueType.FLOAT),
createItem(1304, "latency_ms", "响应延迟(毫秒)", "ms", ValueType.FLOAT),
createItem(1305, "latency_s", "响应时间(秒)", "s", ValueType.FLOAT),
createItem(1306, "tcp_established_connections", "当前建立的TCP连接数反映网络连接活跃状态", "count", ValueType.INT)
);
// 进程相关监控项
List<Item> processItems = Arrays.asList(
createItem(1401, "process_count", "当前运行中的进程总数", "count", ValueType.INT)
);
// 数据库相关监控项
List<Item> databaseItems = Arrays.asList(
createItem(1501, "dbMemory", "数据库内存", "Kb", ValueType.LONG),
createItem(1502, "dblSize", "数据库文件占用空间", "B", ValueType.LONG)
);
// 应用相关监控项
List<Item> appItems = Arrays.asList(
createItem(1601, "responseSuccessRate", "服务器响应成功率", "%", ValueType.FLOAT),
createItem(1602, "login", "登录数", "", ValueType.INT),
createItem(1603, "connections", "连接数", "", ValueType.INT)
);
// 日志相关监控项
List<Item> logItems = Arrays.asList(
createItem(1701, "logRemainingSize", "日志剩余空间", "B", ValueType.LONG)
);
// 系统负载
List<Item> systemItems = Arrays.asList(
createItem(1801, "load", "系统负载", "", ValueType.FLOAT)
);
// 添加到类别映射
CATEGORY_ITEMS.put("cpu", cpuItems);
CATEGORY_ITEMS.put("memory", memoryItems);
CATEGORY_ITEMS.put("swap", memoryItems.stream()
.filter(item -> item.getName().contains("swap") || item.getName().equals("swap"))
.collect(Collectors.toList()));
CATEGORY_ITEMS.put("disk", diskItems);
CATEGORY_ITEMS.put("network", networkItems);
CATEGORY_ITEMS.put("process", processItems);
CATEGORY_ITEMS.put("database", databaseItems);
CATEGORY_ITEMS.put("application", appItems);
CATEGORY_ITEMS.put("log", logItems);
CATEGORY_ITEMS.put("system", systemItems);
}
/**
* 创建监控项
*/
private static MonitorItem createItem(int id, String name, String description, String units, ValueType valueType) {
return new MonitorItem(String.valueOf(id), name, DEFAULT_HOST_ID, description, units, DEFAULT_STATUS, valueType);
}
/**
* 获取所有监控项
*/
public static List<Item> getAllItems() {
return CATEGORY_ITEMS.values().stream()
.flatMap(List::stream)
.collect(Collectors.toList());
}
/**
* 按类别获取监控项
*/
public static List<Item> getItemsByCategory(String category) {
return CATEGORY_ITEMS.getOrDefault(category.toLowerCase(), Collections.emptyList());
}
/**
* 通过名称查找监控项
*/
public static Item findByName(String name) {
return getAllItems().stream()
.filter(item -> item.getName().equals(name))
.findFirst()
.orElse(null);
}
/**
* 通过ID查找监控项
*/
public static Item findById(String itemId) {
return getAllItems().stream()
.filter(item -> item.getItemId().equals(itemId))
.findFirst()
.orElse(null);
}
/**
* 通过数字ID查找监控项
*/
public static Item findById(int itemId) {
return findById(String.valueOf(itemId));
}
/**
* 获取指定主机的所有监控项
*/
public static List<Item> getItemsByHost(String hostId) {
return getAllItems().stream()
.filter(item -> item.getHostId().equals(hostId))
.collect(Collectors.toList());
}
/**
* 验证监控项是否存在
*/
public static boolean contains(String name) {
return findByName(name) != null;
}
/**
* 获取所有监控项名称
*/
public static List<String> getAllItemNames() {
return getAllItems().stream()
.map(Item::getName)
.collect(Collectors.toList());
}
/**
* 获取所有类别
*/
public static List<String> getAllCategories() {
return new ArrayList<>(CATEGORY_ITEMS.keySet());
}
/**
* 获取监控项统计信息
*/
public static Map<String, Object> getStatistics() {
Map<String, Object> stats = new HashMap<>();
stats.put("totalItems", getAllItems().size());
stats.put("categories", getAllCategories());
Map<String, Integer> categoryCounts = new HashMap<>();
for (Map.Entry<String, List<Item>> entry : CATEGORY_ITEMS.entrySet()) {
categoryCounts.put(entry.getKey(), entry.getValue().size());
}
stats.put("categoryCounts", categoryCounts);
return stats;
}
}

View File

@ -0,0 +1,682 @@
package org.jeecg.modules.Util;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.StrUtil;
import com.jcraft.jsch.*;
import lombok.extern.slf4j.Slf4j;
import org.jeecg.modules.base.entity.Rule;
import org.jeecg.modules.base.entity.postgre.AlarmRule;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.*;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
/**
* Prometheus告警规则管理器
* Prometheus服务器管理告警规则
*/
@Slf4j
@Component
public class PrometheusAlertManager {
private Session session;
private ChannelExec channelExec;
private ChannelSftp channelSftp;
// 连接配置
@Value("${prometheus.ssh.host:127.0.0.1}")
private String host;
@Value("${prometheus.ssh.userName:rmsops}")
private String username;
@Value("${prometheus.ssh.password:cnndc66367220}")
private String password;
@Value("${prometheus.ssh.port:22}")
private int port;
// 本地路径缓存
private final Map<String, String> localPathCache = new ConcurrentHashMap<>();
// 优化的超时配置
private int connectionTimeout = 5000; // 本地5秒远程30秒
private int commandTimeout = 10000; // 本地10秒远程60秒
private int sftpTimeout = 8000; // 本地8秒远程30秒
// Prometheus配置
@Value("${prometheus.path.home:/opt/prometheus/}")
private String prometheusHome;
@Value("${prometheus.path.configPath:/opt/prometheus/conf/}")
private String prometheusConfigPath;
@Value("${prometheus.path.alertRulesPath:/opt/prometheus/rules/}")
private String alertRulesPath;
@Value("${prometheus.path.promtoolPath:/opt/prometheus/bin/}")
private String promtoolPath;
@Value("${prometheus.url:http://172.21.170.11}")
private String prometheusHost;
@Value("${prometheus.prometheusPort:9090}")
private int prometheusPort;
// 本地模式标识
@Value("${prometheus.ssh.isLocalMode:true}")
private boolean isLocalMode;
/**
* 构造函数
*/
public PrometheusAlertManager() {
if (isLocalMode) {
adjustLocalTimeouts();
log.info("运行在本地模式");
} else {
log.info("运行在远程模式");
}
}
/**
* 本地模式下调整超时时间
*/
private void adjustLocalTimeouts() {
connectionTimeout = 5000;
commandTimeout = 10000;
sftpTimeout = 8000;
}
/**
* 获取SSH会话
*/
private Session getOrCreateSession() throws JSchException {
if (session == null || !session.isConnected()) {
JSch jsch = new JSch();
session = jsch.getSession(username, host, port);
session.setPassword(password);
Properties config = new Properties();
config.put("StrictHostKeyChecking", "no");
session.setConfig(config);
session.connect(connectionTimeout);
}
return session;
}
/**
* 执行命令 - 本地模式优化
*/
private String executeCommand(String command) throws Exception {
if (isLocalMode) {
return executeLocalCommand(command);
} else {
return executeRemoteCommand(command);
}
}
/**
* 本地命令执行
*/
private String executeLocalCommand(String command) throws Exception {
ProcessBuilder processBuilder = new ProcessBuilder("/bin/bash", "-c", command);
Process process = processBuilder.start();
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
}
}
int exitCode = process.waitFor();
if (exitCode != 0) {
throw new RuntimeException("Command failed with exit code: " + exitCode + ", Output: " +
output.toString());
}
return output.toString().trim();
}
/**
* 远程命令执行
*/
private String executeRemoteCommand(String command) throws Exception {
channelExec = (ChannelExec) getOrCreateSession().openChannel("exec");
channelExec.setCommand(command);
channelExec.connect(commandTimeout);
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(channelExec.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
}
}
int exitCode = channelExec.getExitStatus();
channelExec.disconnect();
if (exitCode != 0) {
throw new RuntimeException("Command failed with exit code: " + exitCode + ", Output: " +
output.toString());
}
return output.toString().trim();
}
/**
* 获取SFTP通道
*/
private ChannelSftp getSftpChannel() throws JSchException {
if (channelSftp == null || channelSftp.isClosed()) {
channelSftp = (ChannelSftp) getOrCreateSession().openChannel("sftp");
channelSftp.connect(sftpTimeout);
}
return channelSftp;
}
/**
* 文件上传 - 本地模式优化
*/
public void uploadFile(String localFilePath, String remoteFilePath) throws Exception {
if (isLocalMode) {
// 本地模式直接复制文件
copyLocalFile(localFilePath, remoteFilePath);
} else {
// 远程模式使用SFTP
ChannelSftp sftp = getSftpChannel();
sftp.put(localFilePath, remoteFilePath);
}
log.info("文件上传完成: {} -> {}", localFilePath, remoteFilePath);
}
/**
* 本地文件复制
*/
private void copyLocalFile(String source, String destination) throws IOException {
File srcFile = new File(source);
File destFile = new File(destination);
// 确保目标目录存在
File destDir = destFile.getParentFile();
if (destDir != null && !destDir.exists()) {
destDir.mkdirs();
}
try (FileInputStream fis = new FileInputStream(srcFile);
FileOutputStream fos = new FileOutputStream(destFile)) {
byte[] buffer = new byte[8192];
int length;
while ((length = fis.read(buffer)) > 0) {
fos.write(buffer, 0, length);
}
}
}
/**
* 列出目录内容 - 本地模式优化
*/
public List<String> listDirectory(String directory) throws Exception {
if (isLocalMode) {
return listLocalDirectory(directory);
} else {
return listRemoteDirectory(directory);
}
}
/**
* 本地目录列出
*/
private List<String> listLocalDirectory(String directory) {
List<String> files = new ArrayList<>();
File dir = new File(directory);
if (dir.exists() && dir.isDirectory()) {
File[] fileList = dir.listFiles();
if (fileList != null) {
for (File file : fileList) {
if (!file.getName().startsWith(".")) { // 排除隐藏文件
files.add(file.getName());
}
}
}
}
return files;
}
/**
* 远程目录列出
*/
private List<String> listRemoteDirectory(String directory) throws SftpException, JSchException {
ChannelSftp sftp = getSftpChannel();
Vector<ChannelSftp.LsEntry> entries = sftp.ls(directory);
List<String> files = new ArrayList<>();
for (ChannelSftp.LsEntry entry : entries) {
String filename = entry.getFilename();
if (!filename.startsWith(".")) { // 排除隐藏文件
files.add(filename);
}
}
return files;
}
/**
* 文件是否存在 - 本地模式优化
*/
public boolean fileExists(String filePath) throws Exception {
if (isLocalMode) {
return new File(filePath).exists();
} else {
try {
ChannelSftp sftp = getSftpChannel();
sftp.stat(filePath);
return true;
} catch (SftpException e) {
return false;
}
}
}
/**
* 删除文件 - 本地模式优化
*/
public void deleteFile(String filePath) throws Exception {
if (isLocalMode) {
File file = new File(filePath);
if (file.exists()) {
file.delete();
}
} else {
ChannelSftp sftp = getSftpChannel();
sftp.rm(filePath);
}
log.info("文件删除完成: {}", filePath);
}
/**
* 创建目录 - 本地模式优化
*/
public void createDirectory(String directory) throws Exception {
if (isLocalMode) {
File dir = new File(directory);
if (!dir.exists()) {
dir.mkdirs();
}
} else {
ChannelSftp sftp = getSftpChannel();
try {
sftp.mkdir(directory);
} catch (SftpException e) {
// 目录可能已存在
log.debug("目录创建失败,可能已存在: {}", directory);
}
}
log.info("目录操作完成: {}", directory);
}
/**
* 根据 AlarmRule 实体保存或更新 Prometheus 告警规则
*/
public boolean saveOrUpdateAlarmRule(AlarmRule alarmRule) throws Exception {
if (alarmRule.getEnabled() == null || alarmRule.getEnabled() != 1) {
log.warn("告警规则 [{}] 未启用,跳过保存", alarmRule.getName());
return false;
}
// 1. 生成 YAML 内容
String ruleContent = generatePrometheusAlertRule(alarmRule);
// 2. 使用 ID 作为文件名确保唯一性
String fileName = alarmRule.getId() + ".yml";
String fullPath = FileUtil.normalize(alertRulesPath + "/" + fileName);
// 3. 写入临时文件并上传
String tempPath = System.getProperty("java.io.tmpdir")+ fileName;
writeTempFile(tempPath, ruleContent);
// 4. 如果原文件存在则备份
if (fileExists(fullPath)) {
backupOriginalFile(fullPath);
}
uploadFile(tempPath, fullPath);
// 5. 验证规则语法
if (!validateAlertRules(fullPath)) {
log.error("告警规则 [{}] 验证失败,回滚更改", alarmRule.getName());
restoreBackupFile(fullPath);
throw new Exception("告警规则语法验证失败");
}
// 6. 重载 Prometheus
reloadPrometheus();
log.info("告警规则 [{}] 已成功同步至 Prometheus", alarmRule.getName());
return true;
}
/**
* 校验告警规则文件的语法 (调用 promtool check rules)
*/
private boolean validateAlertRules(String filePath) throws Exception {
String promtoolPath = prometheusHome + "/promtool";
String command = promtoolPath + " check rules " + filePath;
try {
String result = executeCommand(command);
return result.toLowerCase().contains("success") || result.toLowerCase().contains("check passed");
} catch (Exception e) {
log.error("规则文件验证异常: {}", e.getMessage());
return false;
}
}
/**
* AlarmRule 实体转换为 Prometheus YAML 字符串
*/
private String generatePrometheusAlertRule(AlarmRule alarmRule) {
StringBuilder sb = new StringBuilder();
sb.append("groups:\n");
sb.append("- name: ").append(alarmRule.getName()).append("_group\n");
sb.append(" rules:\n");
sb.append(" - alert: ").append(sanitizeMetricName(alarmRule.getName())).append("\n");
// 生成 PromQL 表达式
String expr = buildExpression(alarmRule);
sb.append(" expr: ").append(expr).append("\n");
// 沉默周期/持续时间
if (alarmRule.getSilenceCycle() != null) {
sb.append(" for: ").append(convertSecondsToDuration(alarmRule.getSilenceCycle())).append("\n");
}
// 标签
sb.append(" labels:\n");
sb.append(" severity: ").append(getSeverityLevel(alarmRule)).append("\n");
sb.append(" source_type: \"").append(defaultString(alarmRule.getSourceType())).append("\"\n");
sb.append(" resource_id: \"").append(defaultString(alarmRule.getSourceId())).append("\"\n");
sb.append(" contact_group: \"").append(defaultString(alarmRule.getContactId())).append("\"\n");
// 注解
Rule ruleObj = alarmRule.getRule();
String conditionDesc = (ruleObj != null) ? ruleObj.joint() : "指标异常";
sb.append(" annotations:\n");
sb.append(" summary: \"").append(alarmRule.getName()).append(" - ").append(conditionDesc).append("\"\n");
sb.append(" description: \"告警名称: ").append(alarmRule.getName())
.append("\\n资源ID: ").append(defaultString(alarmRule.getSourceId()))
.append("\\n监控条件: ").append(conditionDesc).append("\"\n");
return sb.toString();
}
/**
* 构建 PromQL 表达式
*/
private String buildExpression(AlarmRule alarmRule) {
Rule rule = alarmRule.getRule();
if (rule != null && StrUtil.isNotBlank(rule.getName()) && rule.getThreshold() != null) {
// 先使用 Rule 实体中的定义
return rule.getName() + " " + rule.getOperator() + " " + rule.getThreshold();
}
// 最后使用 itemId 和默认阈值
String metric = StrUtil.isNotBlank(alarmRule.getItemId()) ? alarmRule.getItemId() : "unknown_metric";
return metric + " > 80";
}
private String convertSecondsToDuration(Long seconds) {
if (seconds >= 86400) return (seconds / 86400) + "d";
if (seconds >= 3600) return (seconds / 3600) + "h";
if (seconds >= 60) return (seconds / 60) + "m";
return seconds + "s";
}
private String getSeverityLevel(AlarmRule alarmRule) {
// 可以根据 notification 或其他字段映射严重级别
return "warning";
}
private String sanitizeMetricName(String name) {
// 将中文或特殊字符替换为下划线保证符合 Prometheus 命名规范
return name.replaceAll("[^a-zA-Z0-9_]", "_");
}
private String defaultString(String str) {
return StrUtil.isBlank(str) ? "unknown" : str;
}
/**
* 写入临时文件
*/
private void writeTempFile(String filePath, String content) throws IOException {
File file = new File(filePath);
File parentDir = file.getParentFile();
if (parentDir != null && !parentDir.exists()) parentDir.mkdirs();
try (FileWriter writer = new FileWriter(file)) {
writer.write(content);
}
}
private void backupOriginalFile(String filePath) throws Exception {
String backupPath = filePath + ".backup";
if (isLocalMode) {
File original = new File(filePath);
File backup = new File(backupPath);
java.nio.file.Files.copy(original.toPath(), backup.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
} else {
ChannelSftp sftp = getSftpChannel();
sftp.rename(filePath, backupPath);
}
log.info("已备份文件: {} -> {}", filePath, backupPath);
}
private void restoreBackupFile(String filePath) throws Exception {
String backupPath = filePath + ".backup";
if (fileExists(backupPath)) {
if (isLocalMode) {
File backup = new File(backupPath);
File original = new File(filePath);
java.nio.file.Files.copy(backup.toPath(), original.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
backup.delete();
} else {
ChannelSftp sftp = getSftpChannel();
sftp.rename(backupPath, filePath);
}
log.info("已从备份恢复文件: {}", filePath);
}
}
/**
* 验证Prometheus配置
*/
public boolean validatePrometheusConfig() throws Exception {
String command = promtoolPath + " check config " + prometheusConfigPath;
String result = executeCommand(command);
log.info("Prometheus配置验证结果: {}", result);
// 检查输出中是否包含"SUCCESS"或无错误信息
return result.toLowerCase().contains("success") ||
(!result.toLowerCase().contains("error") &&
!result.toLowerCase().contains("failed"));
}
/**
* 重新加载Prometheus
*/
public void reloadPrometheus() throws Exception {
String command =
"curl -X POST http://" + prometheusHost + ":" + prometheusPort + "/-/reload";
String result = executeCommand(command);
log.info("Prometheus重载结果: {}", result);
}
/**
* 保存告警规则到文件
*/
public void saveAlertRuleToFile(String ruleName, String ruleContent) throws Exception {
String tempRulePath = "/tmp/" + ruleName + ".yml";
String finalRulePath = alertRulesPath + "/" + ruleName + ".yml";
// 写入临时文件
writeTempFile(tempRulePath, ruleContent);
// 上传到目标位置
uploadFile(tempRulePath, finalRulePath);
// 验证配置
if (!validatePrometheusConfig()) {
// 如果验证失败回滚更改
deleteFile(finalRulePath);
throw new Exception("告警规则配置验证失败");
}
// 重新加载Prometheus
reloadPrometheus();
log.info("告警规则保存成功: {}", ruleName);
}
/**
* 获取所有告警规则
*/
public List<String> getAllAlertRules() throws Exception {
List<String> rules = new ArrayList<>();
// 获取alert_rules目录下的所有YAML文件
List<String> files = listDirectory(alertRulesPath);
for (String file : files) {
if (file.toLowerCase().endsWith(".yml") || file.toLowerCase().endsWith(".yaml")) {
rules.add(file);
}
}
return rules;
}
/**
* 获取告警规则内容
*/
public String getAlertRuleContent(String ruleName) throws Exception {
String rulePath = alertRulesPath + "/" + ruleName;
if (isLocalMode) {
// 本地模式直接读取文件
try (BufferedReader reader = new BufferedReader(new FileReader(rulePath))) {
StringBuilder content = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
content.append(line).append("\n");
}
return content.toString();
}
} else {
// 远程模式使用SFTP下载
ChannelSftp sftp = getSftpChannel();
try (InputStream inputStream = sftp.get(rulePath);
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
StringBuilder content = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
content.append(line).append("\n");
}
return content.toString();
}
}
}
/**
* 删除告警规则
*/
public void deleteAlertRule(String ruleName) throws Exception {
String rulePath = alertRulesPath + "/" + ruleName;
// 删除规则文件
deleteFile(rulePath);
// 验证配置
if (!validatePrometheusConfig()) {
throw new Exception("删除告警规则后配置验证失败");
}
// 重新加载Prometheus
reloadPrometheus();
log.info("告警规则删除成功: {}", ruleName);
}
/**
* 更新告警规则
*/
public void updateAlertRule(String ruleName, String newRuleContent) throws Exception {
String tempRulePath = "/tmp/" + ruleName + ".yml";
String finalRulePath = alertRulesPath + "/" + ruleName;
// 写入临时文件
writeTempFile(tempRulePath, newRuleContent);
// 上传到目标位置
uploadFile(tempRulePath, finalRulePath);
// 验证配置
if (!validatePrometheusConfig()) {
throw new Exception("更新告警规则后配置验证失败");
}
// 重新加载Prometheus
reloadPrometheus();
log.info("告警规则更新成功: {}", ruleName);
}
/**
* 关闭所有连接
*/
public void close() {
if (channelExec != null && channelExec.isConnected()) {
channelExec.disconnect();
}
if (channelSftp != null && channelSftp.isConnected()) {
channelSftp.disconnect();
}
if (session != null && session.isConnected()) {
session.disconnect();
}
log.info("Prometheus Alert Manager 已关闭");
}
// Getters
public String getHost() {
return host;
}
public String getAlertRulesPath() {
return alertRulesPath;
}
public boolean isLocalMode() {
return isLocalMode;
}
// Setters
public void setLocalMode(boolean localMode) {
isLocalMode = localMode;
if (localMode) {
adjustLocalTimeouts();
}
}
}

View File

@ -6,6 +6,7 @@ import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.jeecg.modules.base.entity.monitor.History;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.cloud.context.config.annotation.RefreshScope;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
@ -20,10 +21,13 @@ import java.util.*;
import java.util.stream.Collectors;
@Component
@RefreshScope
@Slf4j
public class PrometheusUtil {
@Value("${prometheus.url:http://172.21.170.11:9090}")
@Value("${prometheus.url:http://172.21.170.11}")
private String prometheusUrl;
@Value("${prometheus.prometheusPort:9090}")
private int prometheusPort;
@Value("${prometheus.connect-timeout:30}")
private int connectTimeout;
@ -112,7 +116,7 @@ public class PrometheusUtil {
private URI buildQueryUri(String path, String promql, Long start, Long end, Long step) {
String encodedQuery = URLEncoder.encode(promql);
StringBuilder url = new StringBuilder();
url.append(prometheusUrl).append(path)
url.append(prometheusUrl + ":" + prometheusPort).append(path)
.append("?query=").append(encodedQuery);
if (start != null) {
@ -126,6 +130,7 @@ public class PrometheusUtil {
}
try {
log.info("构建URI: " + url);
return new URI(url.toString());
} catch (Exception e) {
throw new RuntimeException("构建URI失败: " + url, e);
@ -207,7 +212,7 @@ public class PrometheusUtil {
//region 健康检查
public boolean healrhChenck() {
try {
String url = prometheusUrl + "/-/healthy";
String url = prometheusUrl + ":" + prometheusPort + "/-/healthy";
restTemplate.getForObject(url, String.class);
return true;
} catch (Exception e) {
@ -222,7 +227,7 @@ public class PrometheusUtil {
* @return
*/
public JSONObject buildInfo() {
String url = prometheusUrl + "/api/v1/status/buildinfo";
String url = prometheusUrl + ":" + prometheusPort + "/api/v1/status/buildinfo";
return get(url).getJSONObject("data");
}
//endregion
@ -239,7 +244,7 @@ public class PrometheusUtil {
public List<QueryResult> queryInstantFull(String promql, Long time) {
StringBuilder url = new StringBuilder();
url.append(prometheusUrl).append("/api/v1/query?query=")
url.append(prometheusUrl+":"+prometheusPort).append("/api/v1/query?query=")
.append(URLEncoder.encode(promql));
if (time != null) {
@ -402,7 +407,7 @@ public class PrometheusUtil {
*/
public List<Map<String, String>> series(List<String> selectors, long start, long end) {
StringBuilder url = new StringBuilder();
url.append(prometheusUrl).append("/api/v1/series?");
url.append(prometheusUrl+":"+prometheusPort).append("/api/v1/series?");
for (String selector : selectors) {
url.append("match[]=").append(URLEncoder.encode(selector))
.append("&");
@ -426,13 +431,11 @@ public class PrometheusUtil {
}
/**
* 获取标签值列表
*/
public Set<String> labelValues(String labelName) {
String url = String.format("%s/api/v1/label/%s/values", prometheusUrl,
String url = String.format("%s/api/v1/label/%s/values", prometheusUrl+":"+prometheusPort,
URLEncoder.encode(labelName));
JSONObject json = get(url);
@ -469,7 +472,7 @@ public class PrometheusUtil {
* 获取元数据
*/
public List<MetricMetadata> metadata(String metric) {
String url = prometheusUrl + "/api/v1/metadata";
String url = prometheusUrl+":"+prometheusPort + "/api/v1/metadata";
if (metric != null) {
url = url + "&metric=" + URLEncoder.encode(metric);
}
@ -500,7 +503,7 @@ public class PrometheusUtil {
* @return List<TargetInfo>
*/
public List<TargetInfo> targets() {
String url = prometheusUrl + "/api/v1/targets";
String url = prometheusUrl+":"+prometheusPort + "/api/v1/targets";
JSONObject json = get(url);
JSONArray activeTargets = json.getJSONObject("data").getJSONArray("activeTargets");
List<TargetInfo> result = new ArrayList<>();
@ -541,7 +544,7 @@ public class PrometheusUtil {
* @return
*/
public List<AlertRule> alertRules() {
String url = prometheusUrl + "/api/v1/alertRules";
String url = prometheusUrl+":"+prometheusPort + "/api/v1/alertRules";
JSONObject json = get(url);
JSONArray groups = json.getJSONObject("data").getJSONArray("groups");
List<AlertRule> result = new ArrayList<>();
@ -664,7 +667,34 @@ public class PrometheusUtil {
}
/**
* 查询数据库挂载的磁盘容量信息
*/
public DiskMetrics queryDiskMetrics(String mountpoint) {
// 构建 PromQL 查询
String promql = String.format(
"node_filesystem_size_bytes{mountpoint=\"%s\"} or " +
"node_filesystem_avail_bytes{mountpoint=\"%s\"} or " +
"node_filesystem_free_bytes{mountpoint=\"%s\"}",
mountpoint, mountpoint, mountpoint
);
// 调用 Prometheus API
List<QueryResult> results = queryInstantFull(promql, null);
return new DiskMetrics();
}
//endregion
}
@Data
class DiskMetrics {
private Double totalBytes; // 总字节数
private Double availableBytes; // 可用字节数
private Double freeBytes; // 空闲字节数
}

View File

@ -22,6 +22,7 @@ import org.jeecg.common.constant.SymbolConstant;
import org.jeecg.common.util.EmailUtil;
import org.jeecg.common.util.JDBCUtil;
import org.jeecg.common.util.RedisUtil;
import org.jeecg.modules.Util.MonitorConfig;
import org.jeecg.modules.Util.PrometheusUtil;
import org.jeecg.modules.base.dto.NameValue;
import org.jeecg.modules.base.entity.monitor.Host;
@ -31,6 +32,7 @@ import org.jeecg.modules.base.entity.postgre.SysDatabase;
import org.jeecg.modules.base.entity.postgre.SysEmail;
import org.jeecg.modules.base.entity.postgre.SysServer;
import org.jeecg.modules.base.enums.ServerStatus;
import org.jeecg.modules.entity.MonitorItem;
import org.jeecg.modules.feignclient.ManageUtil;
import org.jeecg.modules.feignclient.MonitorAlarm;
import org.jeecg.modules.qiyeEmail.base.InstanceSDK;
@ -120,14 +122,14 @@ public class StatusAspect {
List<Host> hosts = new ArrayList<>();
//通过prometheus获取type=db的数据 up{type="db"}
List<PrometheusUtil.QueryResult> hostResults =
prometheusUtil.queryInstantFull("max by (name) (up{type=\"db\"})", null);
prometheusUtil.queryInstantFull("up{type=\"db\"}", null);
for (PrometheusUtil.QueryResult result : hostResults) {
Host host = new Host();
String hostName = result.getMetrics().get("name");
String job = result.getMetrics().get("job");
String instance = result.getMetrics().get("instance");
Double status = result.getValue().getValue();
host.setCode(job);
int status = (int) result.getValue().getValue();
host.setCode(hostName);
host.setHostId(instance);
host.setName(hostName);
host.setStatus(String.valueOf(status));
@ -206,7 +208,7 @@ public class StatusAspect {
List<PrometheusUtil.QueryResult> hostResults =
prometheusUtil.queryInstantFull(
"node_boot_time_seconds{instance=\"" + name + "\"}", null);
"node_boot_time_seconds{instance=~\"" + name + ".*\"}", null);
if (ObjectUtil.isNull(hostResults) || CollUtil.isEmpty(hostResults)) {
redisUtil.hset(key, id, new NameValue(name, online));
@ -217,7 +219,8 @@ public class StatusAspect {
Host host = new Host();
String hostName = result.getMetrics().get("__name__");
String instance = result.getMetrics().get("instance");
Double status = 1.0;
int status = 1;
host.setCode(instance);
host.setHostId(instance);
host.setName(hostName);
host.setStatus(String.valueOf(status));
@ -240,24 +243,30 @@ public class StatusAspect {
ServerStatus.WARN.getValue()), status);
redisUtil.hset(key, id, new NameValue(name, online));
// 更新该服务器的HostId
server.setHostId(host.getHostId());
String hostId = host.getHostId();
server.setHostId(hostId);
serverService.updateById(server);
// 同步服务器监控项 获取所有指标
Set<String> metric = prometheusUtil.metricNames();
List<String> metricNames =new ArrayList<>();
metricNames.add("instance=\"172.21.170.11:9090\"");
long end = System.currentTimeMillis() / 1000;
long start = end - 3600; // 过去1小时
prometheusUtil.series(metricNames,start, end);
Map<String, Item> itemMap = host.getItems();
if (MapUtil.isEmpty(itemMap) || CollUtil.isEmpty(itemMap.values())) {
return;
}
Collection<Item> items = itemMap.values();
Collection<Item> monitorItems = MonitorConfig.getAllItems();
//Set<String> metric = prometheusUtil.metricNames();
//List<String> metricNames =new ArrayList<>();
//metricNames.add("instance=\""+host.getHostId()+"\"");
//long end = System.currentTimeMillis() / 1000;
//long start = end - 3600; // 过去1小时
//prometheusUtil.series(metricNames,start, end);
// Map<String, Item> itemMap = host.getItems();
// if (MapUtil.isEmpty(itemMap) || CollUtil.isEmpty(itemMap.values())) {
// return;
// }
//Collection<Item> items = itemMap.values();
List<AlarmItem> alarmItems = new ArrayList<>();
items.forEach(item -> {
monitorItems.forEach(item -> {
AlarmItem alarmItem = BeanUtil.copyProperties(item, AlarmItem.class);
alarmItem.setId(item.getItemId());
alarmItem.setHostId(hostId);
alarmItems.add(alarmItem);
});
alarmItemService.saveOrUpdateBatch(alarmItems);

View File

@ -12,32 +12,102 @@ public class MetricConfig {
public static final Map<String, MetricConfig> METRIC_MAP = new HashMap<>();
static {
// 根据你提供的 getItems 返回结果进行 1:1 映射
METRIC_MAP.put("37494", new MetricConfig("cpuUtilization",
// 根据之前讨论的固定ID进行映射
// CPU相关 (1001-1003)
METRIC_MAP.put("1001", new MetricConfig("cpu_usage_percent",
"(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[5m]))) * 100",
"%"));
METRIC_MAP.put("1002", new MetricConfig("cpuUtilization",
"(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[1m]))) * 100",
"%"));
METRIC_MAP.put("37445",
new MetricConfig("swapTotalSize", "node_memory_SwapTotal_bytes{instance='%s'}",
"B"));
METRIC_MAP.put("37469", new MetricConfig("load", "node_load1{instance='%s'}", ""));
METRIC_MAP.put("37452", new MetricConfig("swapUtilization",
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
"%"));
METRIC_MAP.put("37443",
new MetricConfig("freeSwapSpace", "node_memory_SwapFree_bytes{instance='%s'}",
"B"));
METRIC_MAP.put("37460", new MetricConfig("latency",
"irate(node_disk_read_time_seconds_total{instance='%s'}[1m]) * 1000", "ms"));
METRIC_MAP.put("37449", new MetricConfig("memoryUtilization",
METRIC_MAP.put("1003", new MetricConfig("system_load_average",
"node_load1{instance='%s'}",
"load"));
// 内存相关 (1101-1107)
METRIC_MAP.put("1101", new MetricConfig("memory_used_percent",
"(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100",
"%"));
METRIC_MAP.put("37660", new MetricConfig("throughput",
"sum(irate(node_network_receive_bytes_total{instance='%s',device!~'lo'}[1m]))",
METRIC_MAP.put("1102", new MetricConfig("memoryUtilization",
"(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100",
"%"));
METRIC_MAP.put("1103", new MetricConfig("memAvailable",
"node_memory_MemAvailable_bytes{instance='%s'}",
"B"));
METRIC_MAP.put("37493", new MetricConfig("responseSuccessRate", "vector(100)", "%"));
METRIC_MAP.put("37454",
new MetricConfig("memAvailable", "node_memory_MemAvailable_bytes{instance='%s'}",
"B"));
METRIC_MAP.put("1104", new MetricConfig("swapUtilization",
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
"%"));
METRIC_MAP.put("1105", new MetricConfig("swapTotalSize",
"node_memory_SwapTotal_bytes{instance='%s'}",
"B"));
METRIC_MAP.put("1106", new MetricConfig("freeSwapSpace",
"node_memory_SwapFree_bytes{instance='%s'}",
"B"));
METRIC_MAP.put("1107", new MetricConfig("swap_used_percent",
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
"%"));
// 磁盘相关 (1201-1202)
METRIC_MAP.put("1201", new MetricConfig("disk_io_read_bytes",
"sum(irate(node_disk_read_bytes_total{instance='%s'}[5m]))",
"bytes/s"));
METRIC_MAP.put("1202", new MetricConfig("disk_io_write_bytes",
"irate(node_disk_written_bytes_total{instance='%s'}[5m])",
"bytes/s"));
// 网络相关 (1301-1306)
METRIC_MAP.put("1301", new MetricConfig("network_receive_bytes",
"irate(node_network_receive_bytes_total{instance='%s',device!='lo'}[5m])",
"bytes/s"));
METRIC_MAP.put("1302", new MetricConfig("network_transmit_bytes",
"irate(node_network_transmit_bytes_total{instance='%s',device!='lo'}[5m])",
"bytes/s"));
METRIC_MAP.put("1303", new MetricConfig("throughput",
"sum(irate(node_network_receive_bytes_total{instance='%s',device!~'lo'}[1m]))",
"Kb/s"));
METRIC_MAP.put("1304", new MetricConfig("latency_ms",
"avg(irate(node_network_receive_bytes_total{instance='%s'}[5m])) > 0",
"ms"));
METRIC_MAP.put("1305", new MetricConfig("latency_s",
"histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{instance='%s'}[5m])) by (le))",
"s"));
METRIC_MAP.put("1306", new MetricConfig("tcp_established_connections",
"node_netstat_Tcp_CurrEstab{instance='%s'}",
"count"));
// 进程相关 (1401)
METRIC_MAP.put("1401", new MetricConfig("process_count",
"node_procs_running{instance='%s'}",
"count"));
// 数据库相关 (1501-1502)
METRIC_MAP.put("1501", new MetricConfig("dbMemory",
"pg_database_size_bytes{datname='armd',instance='%s'}/1024",
"Kb"));
METRIC_MAP.put("1502", new MetricConfig("dblSize",
"sum(pg_database_size_bytes{instance='%s'})",
"B"));
// 应用相关 (1601-1603)
METRIC_MAP.put("1601", new MetricConfig("responseSuccessRate",
"sum(rate(http_requests_total{instance='%s',status=~'2..'}[5m])) / sum(rate(http_requests_total{instance='%s'}[5m])) * 100",
"%"));
METRIC_MAP.put("1602", new MetricConfig("login",
"sum(increase(login_attempts_total{instance='%s'}[5m]))",
""));
METRIC_MAP.put("1603", new MetricConfig("connections",
"node_netstat_Tcp_CurrEstab{instance='%s'}",
""));
// 日志相关 (1701)
METRIC_MAP.put("1701", new MetricConfig("logRemainingSize",
"node_filesystem_avail_bytes{instance='%s',mountpoint=~'/var/log.*|/opt/log.*'}",
"B"));
// 系统负载 (1801)
METRIC_MAP.put("1801", new MetricConfig("load",
"node_load1{instance='%s'}",
""));
}
private String name;

View File

@ -0,0 +1,25 @@
package org.jeecg.modules.entity;
import lombok.Data;
import org.jeecg.modules.base.entity.monitor.Item;
@Data
public class MonitorItem extends Item {
// 构造方法
public MonitorItem() {
}
public MonitorItem(String itemId, String name, String hostId, String description,
String units, String status, org.jeecg.modules.entity.ValueType valueType) {
this.setItemId(itemId);
this.setName(name);
this.setHostId(hostId);
this.setDescription(description);
this.setUnits(units);
this.setStatus(status);
this.setValueType(String.valueOf(valueType.getCode()));
}
}

View File

@ -0,0 +1,83 @@
package org.jeecg.modules.entity;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public enum ValueType {
// 使用枚举值和对应的数字代码
INT(1, "int", "整型"),
FLOAT(2, "float", "浮点型"),
DOUBLE(3, "double", "双精度浮点型"),
LONG(4, "long", "长整型"),
STRING(5, "string", "字符串类型"),
BOOLEAN(6, "boolean", "布尔类型");
private final int code; // 数字代码
private final String name; // 类型名称
private final String desc; // 描述
ValueType(int code, String name, String desc) {
this.code = code;
this.name = name;
this.desc = desc;
}
public int getCode() {
return code;
}
public String getName() {
return name;
}
public String getDesc() {
return desc;
}
/**
* 根据代码获取枚举
*/
public static ValueType fromCode(int code) {
for (ValueType type : values()) {
if (type.code == code) {
return type;
}
}
throw new IllegalArgumentException("无效的值类型代码: " + code);
}
/**
* 根据名称获取枚举
*/
public static ValueType fromName(String name) {
for (ValueType type : values()) {
if (type.name.equalsIgnoreCase(name)) {
return type;
}
}
throw new IllegalArgumentException("无效的值类型名称: " + name);
}
/**
* 验证代码是否有效
*/
public static boolean isValidCode(int code) {
for (ValueType type : values()) {
if (type.code == code) {
return true;
}
}
return false;
}
/**
* 获取所有代码列表
*/
public static List<Integer> getAllCodes() {
return Arrays.stream(values())
.map(ValueType::getCode)
.collect(Collectors.toList());
}
}

View File

@ -16,8 +16,7 @@ public interface IMonitorService {
void dbDetails(String start, String end, String hostId);
Result<?> queryCpuHistoryData(String itemId, Integer itemType, String start, String end,
String instance);
Result<?> queryCpuHistoryData(String itemId, Integer itemType, String start, String end);
Result<?> detail( String hostId, String pageName, String start, String end);

View File

@ -35,6 +35,7 @@ import org.jeecg.modules.base.entity.monitor.History;
import org.jeecg.modules.base.entity.monitor.Host;
import org.jeecg.modules.base.entity.monitor.Item;
import org.jeecg.modules.base.entity.monitor.ItemHistory;
import org.jeecg.modules.base.entity.postgre.AlarmItem;
import org.jeecg.modules.base.entity.postgre.SysDatabase;
import org.jeecg.modules.base.entity.postgre.SysServer;
import org.jeecg.modules.base.enums.ServerStatus;
@ -42,6 +43,7 @@ import org.jeecg.modules.entity.*;
import org.jeecg.modules.feignclient.ManageUtil;
import org.jeecg.modules.feignclient.MonitorAlarm;
import org.jeecg.modules.feignclient.SystemClient;
import org.jeecg.modules.mapper.AlarmItemMapper;
import org.jeecg.modules.mapper.SysServerMapper;
import org.jeecg.modules.service.IMonitorService;
import org.jeecg.modules.service.ISysDatabaseService;
@ -77,6 +79,8 @@ public class MonitorServiceImpl implements IMonitorService {
@Autowired
private SystemClient systemClient;
@Autowired
private AlarmItemMapper alarmItemMapper;
@Autowired
private ISysDatabaseService sysDatabaseService;
@ -86,41 +90,8 @@ public class MonitorServiceImpl implements IMonitorService {
private static final DateTimeFormatter FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
private static final String INSTANCE_9100 = "172.21.170.11:9100";
//private static final String INSTANCE_9256 = "172.21.170.11:9256";
//endregion
// region 数据库监控 PromQL
private static final String PG_INSTANCE = "172.21.170.11.*";
private static final String SHARED_BUFFERS =
"pg_settings_shared_buffers_bytes{instance=~\"" + PG_INSTANCE + "\"}";
private static final String ACTIVE_CONNECTIONS =
"sum(pg_stat_database_numbackends{instance=~\"" + PG_INSTANCE
+ "\",datname!~\"template[0-1]\"})";
private static final String CONNECTION_NUMBER = ACTIVE_CONNECTIONS;
private static final String BUFFER_HIT_RATIO =
"pg_stat_database_blks_hit{instance=~\"" + PG_INSTANCE + "\",datname=\"postgres\"}"
+ " / "
+ "(pg_stat_database_blks_hit{instance=~\"" + PG_INSTANCE
+ "\",datname=\"postgres\"}"
+ " + pg_stat_database_blks_read{instance=~\"" + PG_INSTANCE
+ "\",datname=\"postgres\"})"
+ " * 100";
private static final String WAL_SIZE =
"pg_settings_max_wal_size_bytes{instance=~\"" + PG_INSTANCE + "\"}";
private static final String DATABASE_SIZE =
"sum(pg_database_size_bytes{instance=~\"" + PG_INSTANCE + "\"})";
// endregion
//region 通用查询方法
/**
@ -159,8 +130,11 @@ public class MonitorServiceImpl implements IMonitorService {
//region CPU 监控
@Override
public Result<?> queryCpuHistoryData(String itemId, Integer itemType,
String start, String end, String instance) {
String start, String end) {
try {
//通过itemId查找监控项
AlarmItem alarmItem = alarmItemMapper.selectById(itemId);
String instance = alarmItem.getHostId();
long startTime = parseTimestamp(start);
long endTime = parseTimestamp(end);
@ -196,7 +170,7 @@ public class MonitorServiceImpl implements IMonitorService {
String promql = String.format(
"1 - avg(rate(node_cpu_seconds_total{instance='%s', mode='idle'}[1m]))",
INSTANCE_9100);
hostId);
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
return parseToCpuResult(results, "cpuUtilization", "%", true);
@ -209,7 +183,7 @@ public class MonitorServiceImpl implements IMonitorService {
String promql = String.format(
"(node_filesystem_avail_bytes{instance='%s', mountpoint='/'} / "
+ "node_filesystem_size_bytes{instance='%s', mountpoint='/'}) * 100",
INSTANCE_9100, INSTANCE_9100);
hostId, hostId);
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
return parseToCpuResult(results, "cpuIdleRate", "%", false);
@ -220,7 +194,7 @@ public class MonitorServiceImpl implements IMonitorService {
long end = parseToUnix(endStr);
String promql = String.format(
"irate(node_intr_total{instance='%s'}[1m])", INSTANCE_9100);
"irate(node_intr_total{instance='%s'}[1m])", hostId);
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
return parseToCpuResult(results, "cpuInterrupt", "", false);
@ -231,7 +205,7 @@ public class MonitorServiceImpl implements IMonitorService {
long end = parseToUnix(endStr);
String promql = String.format(
"irate(node_context_switches_total{instance='%s'}[1m])", INSTANCE_9100);
"irate(node_context_switches_total{instance='%s'}[1m])", hostId);
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
return parseToCpuResult(results, "cpuSwitch", "", false);
@ -243,7 +217,7 @@ public class MonitorServiceImpl implements IMonitorService {
String promql = String.format(
"sum by (cpu) (irate(node_cpu_seconds_total{instance='%s', mode!='idle'}[1m])) * 100",
INSTANCE_9100);
instance);
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
List<CpuMonitorResult> coreResults = new ArrayList<>();
@ -306,7 +280,6 @@ public class MonitorServiceImpl implements IMonitorService {
}
//endregion
//region 数据库 监控
@Override
public void dbDetails(String start, String end, String hostId) {
@ -338,36 +311,44 @@ public class MonitorServiceImpl implements IMonitorService {
case "postgresql":
//postgres
result.add(buildItemFromQuery("Database memory state(Total memory)", "B",
SHARED_BUFFERS, startTime, endTime, step));
"pg_settings_shared_buffers_bytes{instance='"+hostId+"'}", startTime, endTime, step));
result.add(buildItemFromQuery("Database user status(Logins per second)", "",
ACTIVE_CONNECTIONS, startTime, endTime, step));
"sum(pg_stat_database_numbackends{instance=~\"" + hostId
+ "\",datname!~\"template[0-1]\"})", startTime, endTime, step));
result.add(buildItemFromQuery("Database connection number", "",
CONNECTION_NUMBER, startTime, endTime, step));
"sum(pg_stat_database_numbackends{instance=~\"" + hostId
+ "\",datname!~\"template[0-1]\"})", startTime, endTime, step));
result.add(buildItemFromQuery("Database connection response time", "",
BUFFER_HIT_RATIO, startTime, endTime, step));
"pg_stat_database_blks_hit{instance=~\"" + hostId + "\",datname=\"postgres\"}"
+ " / "
+ "(pg_stat_database_blks_hit{instance=~\"" + hostId
+ "\",datname=\"postgres\"}"
+ " + pg_stat_database_blks_read{instance=~\"" + hostId
+ "\",datname=\"postgres\"})"
+ " * 100", startTime, endTime, step));
result.add(buildItemFromQuery("Remaining space of the database log file", "B",
WAL_SIZE, startTime, endTime, step));
"pg_settings_max_wal_size_bytes{instance=~\"" + hostId + "\"}", startTime, endTime, step));
result.add(buildItemFromQuery("Database file size", "B",
DATABASE_SIZE, startTime, endTime, step));
"sum(pg_database_size_bytes{instance=~\"" + hostId + "\"})", startTime, endTime, step));
break;
case "Oracle":
//TODO 待修改优化promql语句
//Oracle
result.add(buildItemFromQuery("Database memory state(Total memory)", "B",
"oracledb_sysmetric_total_pga_allocated{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"}",
"oracledb_sysmetric_total_pga_allocated{instance=\""+hostId+"\"}",
startTime, endTime, step));
result.add(buildItemFromQuery("Database user status(Logins per second)", "",
"oracledb_sysmetric_logons_per_sec", startTime, endTime, step));
result.add(buildItemFromQuery("Database connection number", "",
"oracledb_sysmetric_session_count", startTime, endTime, step));
result.add(buildItemFromQuery("Database connection response time", "",
"oracledb_sysmetric_sql_service_response_time{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"}",
"oracledb_sysmetric_sql_service_response_time{instance=\""+hostId+"\"}",
startTime, endTime, step));
result.add(buildItemFromQuery("Remaining space of the database log file", "B",
"oracledb_env_redo_value{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\",name=\"log_size\"}",
"oracledb_env_redo_value{instance=\""+hostId+"\",name=\"log_size\"}",
startTime, endTime, step));
result.add(buildItemFromQuery("Database file size", "B",
"sum(oracledb_tablespace_bytes{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"})",
"sum(oracledb_tablespace_bytes{instance=\""+hostId+"\"})",
startTime, endTime, step));
break;
}
@ -409,7 +390,6 @@ public class MonitorServiceImpl implements IMonitorService {
//region 服务器与进程
@Override
public Result<?> detail(String hostId, String pageName, String start, String end) {
hostId = "172.21.170.10";
switch (pageName) {
case "serviceAndProcess":
@ -426,19 +406,25 @@ public class MonitorServiceImpl implements IMonitorService {
private HostDetailsResult getHostDetails(String hostIp, String start, String end) {
long startTime = parseTimestamp(start);
long endTime = parseTimestamp(end);
long step = calculateStep(startTime, endTime);
String cpuQuery =
"topk(5,\n" +
" avg_over_time(\n" +
" sum by (groupname) (\n" +
" irate(namedprocess_namegroup_cpu_seconds_total[5m])\n" +
" )[5m:]\n" +
" )\n" +
")";
String cpuQuery = String.format(
"topk(5, sum by (groupname) (irate(namedprocess_namegroup_cpu_seconds_total{instance='%s'}[5m])))",
hostIp + ":9256");
String memQuery = String.format(
"topk(5,namedprocess_namegroup_memory_bytes{instance=~\"%s\", memtype=\"resident\"}"
+ "/scalar(max(node_memory_MemTotal_bytes{instance=~\"%s\"})))",
hostIp + ":9256", hostIp + ":9100");
hostIp, hostIp);
HostDetailsResult result = new HostDetailsResult();
result.setCpu(queryProcessMetrics(cpuQuery, startTime, endTime, true));
result.setMemory(queryProcessMetrics(memQuery, startTime, endTime, true));
result.setCpu(queryProcessMetrics(cpuQuery, startTime, endTime, true, step));
result.setMemory(queryProcessMetrics(memQuery, startTime, endTime, true, step));
return result;
}
@ -448,13 +434,18 @@ public class MonitorServiceImpl implements IMonitorService {
* @param isPercent 是否乘以 100 转百分比
*/
private List<HostDetailsResult.ProcessMetric> queryProcessMetrics(
String promql, long start, long end, boolean isPercent) {
String promql, long start, long end, boolean isPercent, long step) {
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
List<PrometheusUtil.QueryResult> resultList = queryRange(promql, start, end, step);
List<PrometheusUtil.QueryResult> safeSorted = resultList.stream()
.sorted(Comparator.comparingInt(qr ->
qr.getValues() == null ? 0 : qr.getValues().size()
))
.collect(Collectors.toList());
List<HostDetailsResult.ProcessMetric> metrics = new ArrayList<>();
double factor = isPercent ? 100.0 : 1.0;
for (PrometheusUtil.QueryResult qr : results) {
for (PrometheusUtil.QueryResult qr : safeSorted) {
String name = qr.getMetrics() != null
? qr.getMetrics().getOrDefault("groupname", "unknown")
: "unknown";
@ -465,8 +456,8 @@ public class MonitorServiceImpl implements IMonitorService {
if (qr.getValues() != null) {
for (PrometheusUtil.DataPoint p : qr.getValues()) {
history.add(new HostDetailsResult.HistoryPoint(
p.getTimestamp(), p.getValue()));
stats.accept(p.getValue());
p.getTimestamp(), p.getValue() / 100));
stats.accept(p.getValue() / 100);
}
}
@ -520,17 +511,20 @@ public class MonitorServiceImpl implements IMonitorService {
@Override
public Result<?> item(String itemId, Integer itemType, String start, String end) {
MetricConfig config = MetricConfig.METRIC_MAP.get(itemId);
//通过itemId查找监控项
AlarmItem alarmItem = alarmItemMapper.selectById(itemId);
if (config == null) {
return null;
}
String instance = alarmItem.getHostId();
try {
long startTs = LocalDateTime.parse(start, FORMATTER)
.toEpochSecond(ZoneOffset.ofHours(8));
long endTs = LocalDateTime.parse(end, FORMATTER)
.toEpochSecond(ZoneOffset.ofHours(8));
String promql = String.format(config.getPromQl(), INSTANCE_9100, INSTANCE_9100);
String promql = String.format(config.getPromQl(), instance, instance);
List<PrometheusUtil.QueryResult> results = queryRange(promql, startTs, endTs, 60);
List<Map<String, Object>> historyPoints = new ArrayList<>();
@ -558,7 +552,7 @@ public class MonitorServiceImpl implements IMonitorService {
return Result.OK(finalResult);
} catch (Exception e) {
log.error("查询指标项失败: itemId={}", itemId, e);
log.error("查询指标项失败: instance={}", instance, e);
return null;
}
}

View File

@ -20,6 +20,7 @@ import org.jeecg.common.system.vo.DictModel;
import org.jeecg.common.util.JDBCUtil;
import org.jeecg.common.util.NumUtil;
import org.jeecg.common.util.RedisUtil;
import org.jeecg.modules.Util.PrometheusAlertManager;
import org.jeecg.modules.Util.PrometheusUtil;
import org.jeecg.modules.base.dto.*;
import org.jeecg.modules.base.entity.monitor.Host;
@ -44,6 +45,11 @@ import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.io.IOException;
import java.nio.file.FileStore;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.*;
@ -68,6 +74,8 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
@Autowired
private PrometheusUtil prometheusUtil;
@Autowired
private PrometheusAlertManager prometheusAlertManager;
@Override
public Result<?> findPage(QueryRequest query) {
@ -116,7 +124,9 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
online = prometheusUtil.isInstanceUp(instance);
databaseDto.setOnline(online);
// 如果数据库不在线 则不需要查询数据库的使用情况
if (!online) continue;
if (!online) {
continue;
}
// 获取数据库指标信息
// dbMemory kb -> GB
@ -374,6 +384,18 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
}
spaceInfos = template.query(DBSQL.SPACE_OR, mapper);
}
if (StrUtil.equals(dbType, POSTGRESQL.getType())) {
String dbUrl = sysDatabase.getDbUrl();
String dbDriver = sysDatabase.getDbDriver();
String dbUsername = sysDatabase.getDbUsername();
String dbPassword = sysDatabase.getDbPassword();
RowMapper<SpaceInfo> mapper = new SpaceRowMapper();
JdbcTemplate template = JDBCUtil.template(dbUrl, dbDriver, dbUsername, dbPassword);
if (ObjectUtil.isNull(template)) {
return spaceInfos;
}
spaceInfos = template.query(DBSQL.SPACE_PG, mapper);
}
return spaceInfos;
}
@ -541,7 +563,8 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
instance);
break;
case LOGREMAININGSIZE:
promql = String.format("pg_wal_size_bytes{instance=\"%s\"}", instance);
promql = String.format("pg_wal_size_bytes{instance=\"%s\"}/(1024 * 1024 )",
instance);
break;
}
} else if ("Oracle".equalsIgnoreCase(dbType)) {

View File

@ -396,48 +396,54 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
return Result.error("HostId" + Prompt.PARAM_REQUIRED);
}
//查询条件
sysServer.setIpAddress("172.21.170.11");
String instance = sysServer.getIpAddress() + ":9100";
String instance = sysServer.getIpAddress();
BasicInfo basicInfo = new BasicInfo();
// --- 静态元数据 ( node_uname_info 标签获取) ---
String osInfoquery = String.format("node_os_info{instance=\"%s\"}", instance);
JSONObject osInfo = queryPromSingleMetric(osInfoquery);
if (ObjectUtil.isNotNull(osInfo)) {
JSONObject labels = osInfo.getJSONObject("metric");
basicInfo.setOsName(labels.getString("name"));
basicInfo.setOsVersion(labels.getString("version"));
List<PrometheusUtil.QueryResult> queryResultList =
prometheusUtil.queryInstantFull(osInfoquery, null);
if (ObjectUtil.isNotNull(queryResultList) && !queryResultList.isEmpty()) {
Map<String, String> labels = queryResultList.get(0).getMetrics();
basicInfo.setOsName(labels.get("name"));
basicInfo.setOsVersion(labels.get("version"));
}
// 指标包含: nodename(hostname), release(osVersion), sysname(osName), machine(cpuType)
JSONObject unameMetric =
queryPromSingleMetric("node_uname_info{instance=\"" + instance + "\"}");
if (ObjectUtil.isNotNull(unameMetric)) {
JSONObject labels = unameMetric.getJSONObject("metric");
basicInfo.setHostName(labels.getString("nodename"));
basicInfo.setKernelVersion(labels.getString("release"));
String unameQuery = String.format("node_uname_info{instance=\"%s\"}", instance);
List<PrometheusUtil.QueryResult> unameQueryList =
prometheusUtil.queryInstantFull(unameQuery, null);
if (ObjectUtil.isNotNull(unameQueryList) && !unameQueryList.isEmpty()) {
Map<String, String> labels = unameQueryList.get(0).getMetrics();
basicInfo.setHostName(labels.get("nodename"));
basicInfo.setKernelVersion(labels.get("release"));
// basicInfo.setOsName(labels.getString("sysname"));
basicInfo.setCpuType(labels.getString("machine"));
basicInfo.setCpuType(labels.get("machine"));
}
// --- 服务器状态与运行时间 ---
Double upValue = queryPromSingleValue("up{instance='" + instance + "'}");
Double upValue =
prometheusUtil.queryInstant("up{instance='" + instance + "'}");
basicInfo.setRunningState(ObjectUtil.isNotNull(upValue) && upValue == 1.0);
Double bootTime = queryPromSingleValue(
"time() - node_boot_time_seconds{instance='" + instance + "'}");
Double bootTime =
prometheusUtil.queryInstant(
"time() - node_boot_time_seconds{instance='" + instance + "'}");
basicInfo.setRunTime(
ObjectUtil.isNull(bootTime) ? "--" : NumberUtil.round(bootTime / 3600.0, 1) + "h");
// --- 硬件规格 ---
Double memTotal =
queryPromSingleValue("node_memory_MemTotal_bytes{instance='" + instance + "'}");
Double memTotal = prometheusUtil.queryInstant(
"node_memory_MemTotal_bytes{instance='" + instance + "'}");
basicInfo.setRamSize(ObjectUtil.isNull(memTotal) ? "--" :
NumberUtil.round(memTotal / 1024 / 1024 / 1024, 1) + "GB");
Double cpuCores = queryPromSingleValue(
"count(node_cpu_seconds_total{instance='" + instance + "',mode='idle'})");
Double cpuCores =
prometheusUtil.queryInstant(
"count(node_cpu_seconds_total{instance='" + instance + "',mode='idle'})");
basicInfo.setCpuCores(
ObjectUtil.isNull(cpuCores) ? "--" : String.valueOf(cpuCores.intValue()));
@ -446,34 +452,36 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
"count(node_filesystem_size_bytes{instance=\"%s\", fstype!~\"tmpfs|squashfs|overlay|autofs\"})",
instance
);
Double diskUsed = queryPromSingleValue(query);
Double diskUsed = prometheusUtil.queryInstant(query);
basicInfo.setTotalDiskPar(
ObjectUtil.isNull(diskUsed) ? "--" : String.valueOf(diskUsed.intValue()));
// 查询语句
JSONObject dmiResult = getNodeDmiInfoMetric(
"node_dmi_info * on(instance) group_left(time_zone) node_time_zone_offset_seconds {instance=\"" +
instance + "\"}");
if (ObjectUtil.isNotNull(dmiResult)) {
String dmiQuery = String.format(
"node_dmi_info * on(instance) group_left(time_zone) node_time_zone_offset_seconds {instance=\"%s\"}",
instance);
List<PrometheusUtil.QueryResult> dmiResultList =
prometheusUtil.queryInstantFull(dmiQuery, null);
if (ObjectUtil.isNotNull(dmiResultList) && !dmiResultList.isEmpty()) {
JSONObject dmiMetric = dmiResult.getJSONObject("metric");
basicInfo.setBiosVersion(dmiMetric.getString("bios_version"));
basicInfo.setBiosSupplier(dmiMetric.getString("bios_vendor"));
basicInfo.setManufacturer(dmiMetric.getString("system_vendor"));
basicInfo.setModelNumber(dmiMetric.getString("product_name"));
String timeZone = dmiMetric.getString("time_zone");
Map<String, String> dmiMetric = dmiResultList.get(0).getMetrics();
basicInfo.setBiosVersion(dmiMetric.get("bios_version"));
basicInfo.setBiosSupplier(dmiMetric.get("bios_vendor"));
basicInfo.setManufacturer(dmiMetric.get("system_vendor"));
basicInfo.setModelNumber(dmiMetric.get("product_name"));
String timeZone = dmiMetric.get("time_zone");
basicInfo.setZone(timeZone);
}
//startTime
JSONObject startTime =
getNodeDmiInfoMetric("node_boot_time_seconds{instance=\"" + instance + "\"}");
if (ObjectUtil.isNotNull(startTime)) {
List<PrometheusUtil.QueryResult> startTimeList = prometheusUtil.queryInstantFull(
"node_boot_time_seconds{instance=\"" + instance + "\"}", null);
if (ObjectUtil.isNotNull(startTimeList) && !startTimeList.isEmpty()) {
// 解析
JSONArray array = startTime.getJSONArray("value");
PrometheusUtil.DataPoint dataPoint = startTimeList.get(0).getValue();
Integer time = null;
if (array != null && array.size() > 1) {
Object value = array.get(1);
if (dataPoint != null) {
Object value = dataPoint.getValue();
if (value instanceof String) {
time = Double.valueOf((String) value).intValue();
} else if (value instanceof Number) {
@ -487,21 +495,21 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
// --- 实时利用率 (仪表盘数据) ---
// CPU使用率 = 100 - idle
Double cpuUsedVal = queryPromSingleValue(
Double cpuUsedVal = prometheusUtil.queryInstant(
"100 - (avg(irate(node_cpu_seconds_total{instance='" + instance +
"',mode='idle'}[5m])) * 100)");
basicInfo.setCpuUsed(ObjectUtil.isNull(cpuUsedVal) ? 0.0 :
NumberUtil.round(cpuUsedVal, 1).doubleValue());
// 内存使用率 = (Total - Avail) / Total
Double memUsedVal = queryPromSingleValue(
Double memUsedVal = prometheusUtil.queryInstant(
"(1 - (node_memory_MemAvailable_bytes{instance='" + instance +
"'} / node_memory_MemTotal_bytes{instance='" + instance + "'})) * 100");
basicInfo.setMemoryUsed(
ObjectUtil.isNull(memUsedVal) ? 0.0 :
NumberUtil.round(memUsedVal, 1).doubleValue());
// 响应成功率 (Prometheus通常需要通过blackbox_exporter或ping这里演示默认值)
// 响应成功率 ()
basicInfo.setResponseSuccessRate(100.0);
// --- 磁盘使用情况 (多分区解析) ---
@ -511,7 +519,7 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
basicInfo.setIp(sysServer.getIpAddress());
basicInfo.setLocation("DataCenter");
double netWorks = queryPromSingleValue(
double netWorks = prometheusUtil.queryInstant(
"count(node_network_info{operstate=\"up\",instance=\"" + instance + "\"})");
basicInfo.setNetwork(String.valueOf(netWorks));
@ -674,26 +682,14 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
instance + "',fstype!~'tmpfs|udev|overlay'}) * 100";
try {
String url = PROM_URL + URLEncoder.encode(promql, StandardCharsets.UTF_8.name());
// 1. 手动对 query 参数进行编码
String encodedQuery = URLEncoder.encode(promql, StandardCharsets.UTF_8.name());
// 2. 拼接完整的 URL 字符串
String fullUrl = PROM_URL + encodedQuery;
// 3. 关键步骤 String 包装成 java.net.URI
// 这样 RestTemplate 就会原封不动地发送这个地址
java.net.URI uri = new java.net.URI(fullUrl);
String response = restTemplate.getForObject(uri, String.class);
JSONObject json = JSON.parseObject(response);
JSONArray resultArr = json.getJSONObject("data").getJSONArray("result");
for (int i = 0; i < resultArr.size(); i++) {
JSONObject item = resultArr.getJSONObject(i);
String mountpoint = item.getJSONObject("metric").getString("mountpoint");
Double val = item.getJSONArray("value").getDouble(1);
List<PrometheusUtil.QueryResult> queryResults =
prometheusUtil.queryInstantFull(promql, null);
for (int i = 0; i < queryResults.size(); i++) {
Map<String, String> item = queryResults.get(i).getMetrics();
String mountpoint = item.get("mountpoint");
PrometheusUtil.DataPoint dataPoint = queryResults.get(i).getValue();
Double val = dataPoint.getValue();
xData.add(mountpoint);
yData.add(NumberUtil.round(val, 1).doubleValue());
}
@ -705,23 +701,6 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
return diskUsedValue;
}
public JSONObject getNodeDmiInfoMetric(String query) {
try {
// 获取 Metric 标签的方法
JSONObject metricObj = queryPromSingleMetric(query);
if (ObjectUtil.isNotNull(metricObj)) {
return metricObj;
}
} catch (Exception e) {
log.error("获取 BIOS Version 失败: {}", e.getMessage());
}
return null;
}
/**
* 时区值映射