修改告警系统监控项,
This commit is contained in:
parent
75929c4c8a
commit
59e337065b
|
|
@ -0,0 +1,214 @@
|
||||||
|
package org.jeecg.modules.Util;
|
||||||
|
|
||||||
|
import org.jeecg.modules.base.entity.monitor.Item;
|
||||||
|
import org.jeecg.modules.entity.MonitorItem;
|
||||||
|
import org.jeecg.modules.entity.ValueType;
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 监控项配置类 - 包含所有预定义的监控项
|
||||||
|
*/
|
||||||
|
public class MonitorConfig {
|
||||||
|
|
||||||
|
// 默认主机ID
|
||||||
|
public static final String DEFAULT_HOST_ID = "host-001";
|
||||||
|
|
||||||
|
// 默认状态
|
||||||
|
public static final String DEFAULT_STATUS = "active";
|
||||||
|
|
||||||
|
// 按类别分组存储监控项
|
||||||
|
private static final Map<String, List<Item>> CATEGORY_ITEMS = new HashMap<>();
|
||||||
|
// ID生成器
|
||||||
|
private static int nextId = 1001;
|
||||||
|
|
||||||
|
static {
|
||||||
|
initializeAllItems();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 初始化所有监控项
|
||||||
|
*/
|
||||||
|
private static void initializeAllItems() {
|
||||||
|
// 生成ID列表
|
||||||
|
Map<String, Integer> idMap = new HashMap<>();
|
||||||
|
|
||||||
|
// CPU相关监控项
|
||||||
|
List<Item> cpuItems = Arrays.asList(
|
||||||
|
createItem(1001, "cpu_usage_percent", "CPU使用率百分比,反映服务器计算资源占用情况", "%", ValueType.FLOAT),
|
||||||
|
createItem(1002, "cpuUtilization", "CPU utilization in %", "%", ValueType.FLOAT),
|
||||||
|
createItem(1003, "system_load_average", "系统平均负载,1分钟内等待运行的进程平均数", "load", ValueType.FLOAT)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 内存相关监控项
|
||||||
|
List<Item> memoryItems = Arrays.asList(
|
||||||
|
createItem(1101, "memory_used_percent", "内存使用率,服务器物理内存占用比例", "%", ValueType.FLOAT),
|
||||||
|
createItem(1102, "memoryUtilization", "Memory used percentage is calculated as (100-pavailable)", "%", ValueType.FLOAT),
|
||||||
|
createItem(1103, "memAvailable", "服务器物理内存可用空间", "B", ValueType.LONG),
|
||||||
|
createItem(1104, "swapUtilization", "服务器交换内存使用率", "%", ValueType.FLOAT),
|
||||||
|
createItem(1105, "swapTotalSize", "The total space of swap volume/file in bytes", "B", ValueType.LONG),
|
||||||
|
createItem(1106, "freeSwapSpace", "The free space of swap volume/file in bytes", "B", ValueType.LONG),
|
||||||
|
createItem(1107, "swap_used_percent", "交换分区(Swap)使用率,内存不足时的虚拟内存占用", "%", ValueType.FLOAT)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 磁盘相关监控项
|
||||||
|
List<Item> diskItems = Arrays.asList(
|
||||||
|
createItem(1201, "disk_io_read_bytes", "磁盘读取吞吐量,每秒读取的数据量", "bytes/s", ValueType.FLOAT),
|
||||||
|
createItem(1202, "disk_io_write_bytes", "磁盘写入吞吐量,每秒写入的数据量", "bytes/s", ValueType.FLOAT)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 网络相关监控项
|
||||||
|
List<Item> networkItems = Arrays.asList(
|
||||||
|
createItem(1301, "network_receive_bytes", "网络接收流量,网卡入站数据传输速率", "bytes/s", ValueType.FLOAT),
|
||||||
|
createItem(1302, "network_transmit_bytes", "网络发送流量,网卡出站数据传输速率", "bytes/s", ValueType.FLOAT),
|
||||||
|
createItem(1303, "throughput", "网卡吞吐量", "Kb/s", ValueType.FLOAT),
|
||||||
|
createItem(1304, "latency_ms", "响应延迟(毫秒)", "ms", ValueType.FLOAT),
|
||||||
|
createItem(1305, "latency_s", "响应时间(秒)", "s", ValueType.FLOAT),
|
||||||
|
createItem(1306, "tcp_established_connections", "当前建立的TCP连接数,反映网络连接活跃状态", "count", ValueType.INT)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 进程相关监控项
|
||||||
|
List<Item> processItems = Arrays.asList(
|
||||||
|
createItem(1401, "process_count", "当前运行中的进程总数", "count", ValueType.INT)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 数据库相关监控项
|
||||||
|
List<Item> databaseItems = Arrays.asList(
|
||||||
|
createItem(1501, "dbMemory", "数据库内存", "Kb", ValueType.LONG),
|
||||||
|
createItem(1502, "dblSize", "数据库文件占用空间", "B", ValueType.LONG)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 应用相关监控项
|
||||||
|
List<Item> appItems = Arrays.asList(
|
||||||
|
createItem(1601, "responseSuccessRate", "服务器响应成功率", "%", ValueType.FLOAT),
|
||||||
|
createItem(1602, "login", "登录数", "", ValueType.INT),
|
||||||
|
createItem(1603, "connections", "连接数", "", ValueType.INT)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 日志相关监控项
|
||||||
|
List<Item> logItems = Arrays.asList(
|
||||||
|
createItem(1701, "logRemainingSize", "日志剩余空间", "B", ValueType.LONG)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 系统负载
|
||||||
|
List<Item> systemItems = Arrays.asList(
|
||||||
|
createItem(1801, "load", "系统负载", "", ValueType.FLOAT)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 添加到类别映射
|
||||||
|
CATEGORY_ITEMS.put("cpu", cpuItems);
|
||||||
|
CATEGORY_ITEMS.put("memory", memoryItems);
|
||||||
|
CATEGORY_ITEMS.put("swap", memoryItems.stream()
|
||||||
|
.filter(item -> item.getName().contains("swap") || item.getName().equals("swap"))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
CATEGORY_ITEMS.put("disk", diskItems);
|
||||||
|
CATEGORY_ITEMS.put("network", networkItems);
|
||||||
|
CATEGORY_ITEMS.put("process", processItems);
|
||||||
|
CATEGORY_ITEMS.put("database", databaseItems);
|
||||||
|
CATEGORY_ITEMS.put("application", appItems);
|
||||||
|
CATEGORY_ITEMS.put("log", logItems);
|
||||||
|
CATEGORY_ITEMS.put("system", systemItems);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 创建监控项
|
||||||
|
*/
|
||||||
|
private static MonitorItem createItem(int id, String name, String description, String units, ValueType valueType) {
|
||||||
|
return new MonitorItem(String.valueOf(id), name, DEFAULT_HOST_ID, description, units, DEFAULT_STATUS, valueType);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取所有监控项
|
||||||
|
*/
|
||||||
|
public static List<Item> getAllItems() {
|
||||||
|
return CATEGORY_ITEMS.values().stream()
|
||||||
|
.flatMap(List::stream)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按类别获取监控项
|
||||||
|
*/
|
||||||
|
public static List<Item> getItemsByCategory(String category) {
|
||||||
|
return CATEGORY_ITEMS.getOrDefault(category.toLowerCase(), Collections.emptyList());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 通过名称查找监控项
|
||||||
|
*/
|
||||||
|
public static Item findByName(String name) {
|
||||||
|
return getAllItems().stream()
|
||||||
|
.filter(item -> item.getName().equals(name))
|
||||||
|
.findFirst()
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 通过ID查找监控项
|
||||||
|
*/
|
||||||
|
public static Item findById(String itemId) {
|
||||||
|
return getAllItems().stream()
|
||||||
|
.filter(item -> item.getItemId().equals(itemId))
|
||||||
|
.findFirst()
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 通过数字ID查找监控项
|
||||||
|
*/
|
||||||
|
public static Item findById(int itemId) {
|
||||||
|
return findById(String.valueOf(itemId));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取指定主机的所有监控项
|
||||||
|
*/
|
||||||
|
public static List<Item> getItemsByHost(String hostId) {
|
||||||
|
return getAllItems().stream()
|
||||||
|
.filter(item -> item.getHostId().equals(hostId))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证监控项是否存在
|
||||||
|
*/
|
||||||
|
public static boolean contains(String name) {
|
||||||
|
return findByName(name) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取所有监控项名称
|
||||||
|
*/
|
||||||
|
public static List<String> getAllItemNames() {
|
||||||
|
return getAllItems().stream()
|
||||||
|
.map(Item::getName)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取所有类别
|
||||||
|
*/
|
||||||
|
public static List<String> getAllCategories() {
|
||||||
|
return new ArrayList<>(CATEGORY_ITEMS.keySet());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取监控项统计信息
|
||||||
|
*/
|
||||||
|
public static Map<String, Object> getStatistics() {
|
||||||
|
Map<String, Object> stats = new HashMap<>();
|
||||||
|
stats.put("totalItems", getAllItems().size());
|
||||||
|
stats.put("categories", getAllCategories());
|
||||||
|
|
||||||
|
Map<String, Integer> categoryCounts = new HashMap<>();
|
||||||
|
for (Map.Entry<String, List<Item>> entry : CATEGORY_ITEMS.entrySet()) {
|
||||||
|
categoryCounts.put(entry.getKey(), entry.getValue().size());
|
||||||
|
}
|
||||||
|
stats.put("categoryCounts", categoryCounts);
|
||||||
|
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,682 @@
|
||||||
|
package org.jeecg.modules.Util;
|
||||||
|
|
||||||
|
import cn.hutool.core.io.FileUtil;
|
||||||
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
import com.jcraft.jsch.*;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.jeecg.modules.base.entity.Rule;
|
||||||
|
import org.jeecg.modules.base.entity.postgre.AlarmRule;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prometheus告警规则管理器
|
||||||
|
* Prometheus服务器,管理告警规则
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@Component
|
||||||
|
public class PrometheusAlertManager {
|
||||||
|
|
||||||
|
private Session session;
|
||||||
|
private ChannelExec channelExec;
|
||||||
|
private ChannelSftp channelSftp;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// 连接配置
|
||||||
|
@Value("${prometheus.ssh.host:127.0.0.1}")
|
||||||
|
private String host;
|
||||||
|
@Value("${prometheus.ssh.userName:rmsops}")
|
||||||
|
private String username;
|
||||||
|
@Value("${prometheus.ssh.password:cnndc66367220}")
|
||||||
|
private String password;
|
||||||
|
@Value("${prometheus.ssh.port:22}")
|
||||||
|
private int port;
|
||||||
|
|
||||||
|
// 本地路径缓存
|
||||||
|
private final Map<String, String> localPathCache = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
// 优化的超时配置
|
||||||
|
private int connectionTimeout = 5000; // 本地5秒,远程30秒
|
||||||
|
private int commandTimeout = 10000; // 本地10秒,远程60秒
|
||||||
|
private int sftpTimeout = 8000; // 本地8秒,远程30秒
|
||||||
|
|
||||||
|
// Prometheus配置
|
||||||
|
@Value("${prometheus.path.home:/opt/prometheus/}")
|
||||||
|
private String prometheusHome;
|
||||||
|
@Value("${prometheus.path.configPath:/opt/prometheus/conf/}")
|
||||||
|
private String prometheusConfigPath;
|
||||||
|
@Value("${prometheus.path.alertRulesPath:/opt/prometheus/rules/}")
|
||||||
|
private String alertRulesPath;
|
||||||
|
@Value("${prometheus.path.promtoolPath:/opt/prometheus/bin/}")
|
||||||
|
private String promtoolPath;
|
||||||
|
@Value("${prometheus.url:http://172.21.170.11}")
|
||||||
|
private String prometheusHost;
|
||||||
|
@Value("${prometheus.prometheusPort:9090}")
|
||||||
|
private int prometheusPort;
|
||||||
|
// 本地模式标识
|
||||||
|
@Value("${prometheus.ssh.isLocalMode:true}")
|
||||||
|
private boolean isLocalMode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 构造函数
|
||||||
|
*/
|
||||||
|
public PrometheusAlertManager() {
|
||||||
|
if (isLocalMode) {
|
||||||
|
adjustLocalTimeouts();
|
||||||
|
log.info("运行在本地模式");
|
||||||
|
} else {
|
||||||
|
log.info("运行在远程模式");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* 本地模式下调整超时时间
|
||||||
|
*/
|
||||||
|
private void adjustLocalTimeouts() {
|
||||||
|
connectionTimeout = 5000;
|
||||||
|
commandTimeout = 10000;
|
||||||
|
sftpTimeout = 8000;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取SSH会话
|
||||||
|
*/
|
||||||
|
private Session getOrCreateSession() throws JSchException {
|
||||||
|
if (session == null || !session.isConnected()) {
|
||||||
|
JSch jsch = new JSch();
|
||||||
|
session = jsch.getSession(username, host, port);
|
||||||
|
session.setPassword(password);
|
||||||
|
|
||||||
|
Properties config = new Properties();
|
||||||
|
config.put("StrictHostKeyChecking", "no");
|
||||||
|
session.setConfig(config);
|
||||||
|
session.connect(connectionTimeout);
|
||||||
|
}
|
||||||
|
return session;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 执行命令 - 本地模式优化
|
||||||
|
*/
|
||||||
|
private String executeCommand(String command) throws Exception {
|
||||||
|
if (isLocalMode) {
|
||||||
|
return executeLocalCommand(command);
|
||||||
|
} else {
|
||||||
|
return executeRemoteCommand(command);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 本地命令执行
|
||||||
|
*/
|
||||||
|
private String executeLocalCommand(String command) throws Exception {
|
||||||
|
ProcessBuilder processBuilder = new ProcessBuilder("/bin/bash", "-c", command);
|
||||||
|
Process process = processBuilder.start();
|
||||||
|
|
||||||
|
StringBuilder output = new StringBuilder();
|
||||||
|
try (BufferedReader reader = new BufferedReader(
|
||||||
|
new InputStreamReader(process.getInputStream()))) {
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
output.append(line).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int exitCode = process.waitFor();
|
||||||
|
if (exitCode != 0) {
|
||||||
|
throw new RuntimeException("Command failed with exit code: " + exitCode + ", Output: " +
|
||||||
|
output.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
return output.toString().trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 远程命令执行
|
||||||
|
*/
|
||||||
|
private String executeRemoteCommand(String command) throws Exception {
|
||||||
|
channelExec = (ChannelExec) getOrCreateSession().openChannel("exec");
|
||||||
|
channelExec.setCommand(command);
|
||||||
|
channelExec.connect(commandTimeout);
|
||||||
|
|
||||||
|
StringBuilder output = new StringBuilder();
|
||||||
|
try (BufferedReader reader = new BufferedReader(
|
||||||
|
new InputStreamReader(channelExec.getInputStream()))) {
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
output.append(line).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int exitCode = channelExec.getExitStatus();
|
||||||
|
channelExec.disconnect();
|
||||||
|
|
||||||
|
if (exitCode != 0) {
|
||||||
|
throw new RuntimeException("Command failed with exit code: " + exitCode + ", Output: " +
|
||||||
|
output.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
return output.toString().trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取SFTP通道
|
||||||
|
*/
|
||||||
|
private ChannelSftp getSftpChannel() throws JSchException {
|
||||||
|
if (channelSftp == null || channelSftp.isClosed()) {
|
||||||
|
channelSftp = (ChannelSftp) getOrCreateSession().openChannel("sftp");
|
||||||
|
channelSftp.connect(sftpTimeout);
|
||||||
|
}
|
||||||
|
return channelSftp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 文件上传 - 本地模式优化
|
||||||
|
*/
|
||||||
|
public void uploadFile(String localFilePath, String remoteFilePath) throws Exception {
|
||||||
|
if (isLocalMode) {
|
||||||
|
// 本地模式:直接复制文件
|
||||||
|
copyLocalFile(localFilePath, remoteFilePath);
|
||||||
|
} else {
|
||||||
|
// 远程模式:使用SFTP
|
||||||
|
ChannelSftp sftp = getSftpChannel();
|
||||||
|
sftp.put(localFilePath, remoteFilePath);
|
||||||
|
}
|
||||||
|
log.info("文件上传完成: {} -> {}", localFilePath, remoteFilePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 本地文件复制
|
||||||
|
*/
|
||||||
|
private void copyLocalFile(String source, String destination) throws IOException {
|
||||||
|
File srcFile = new File(source);
|
||||||
|
File destFile = new File(destination);
|
||||||
|
|
||||||
|
// 确保目标目录存在
|
||||||
|
File destDir = destFile.getParentFile();
|
||||||
|
if (destDir != null && !destDir.exists()) {
|
||||||
|
destDir.mkdirs();
|
||||||
|
}
|
||||||
|
|
||||||
|
try (FileInputStream fis = new FileInputStream(srcFile);
|
||||||
|
FileOutputStream fos = new FileOutputStream(destFile)) {
|
||||||
|
byte[] buffer = new byte[8192];
|
||||||
|
int length;
|
||||||
|
while ((length = fis.read(buffer)) > 0) {
|
||||||
|
fos.write(buffer, 0, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 列出目录内容 - 本地模式优化
|
||||||
|
*/
|
||||||
|
public List<String> listDirectory(String directory) throws Exception {
|
||||||
|
if (isLocalMode) {
|
||||||
|
return listLocalDirectory(directory);
|
||||||
|
} else {
|
||||||
|
return listRemoteDirectory(directory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 本地目录列出
|
||||||
|
*/
|
||||||
|
private List<String> listLocalDirectory(String directory) {
|
||||||
|
List<String> files = new ArrayList<>();
|
||||||
|
File dir = new File(directory);
|
||||||
|
|
||||||
|
if (dir.exists() && dir.isDirectory()) {
|
||||||
|
File[] fileList = dir.listFiles();
|
||||||
|
if (fileList != null) {
|
||||||
|
for (File file : fileList) {
|
||||||
|
if (!file.getName().startsWith(".")) { // 排除隐藏文件
|
||||||
|
files.add(file.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 远程目录列出
|
||||||
|
*/
|
||||||
|
private List<String> listRemoteDirectory(String directory) throws SftpException, JSchException {
|
||||||
|
ChannelSftp sftp = getSftpChannel();
|
||||||
|
Vector<ChannelSftp.LsEntry> entries = sftp.ls(directory);
|
||||||
|
|
||||||
|
List<String> files = new ArrayList<>();
|
||||||
|
for (ChannelSftp.LsEntry entry : entries) {
|
||||||
|
String filename = entry.getFilename();
|
||||||
|
if (!filename.startsWith(".")) { // 排除隐藏文件
|
||||||
|
files.add(filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 文件是否存在 - 本地模式优化
|
||||||
|
*/
|
||||||
|
public boolean fileExists(String filePath) throws Exception {
|
||||||
|
if (isLocalMode) {
|
||||||
|
return new File(filePath).exists();
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
ChannelSftp sftp = getSftpChannel();
|
||||||
|
sftp.stat(filePath);
|
||||||
|
return true;
|
||||||
|
} catch (SftpException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 删除文件 - 本地模式优化
|
||||||
|
*/
|
||||||
|
public void deleteFile(String filePath) throws Exception {
|
||||||
|
if (isLocalMode) {
|
||||||
|
File file = new File(filePath);
|
||||||
|
if (file.exists()) {
|
||||||
|
file.delete();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ChannelSftp sftp = getSftpChannel();
|
||||||
|
sftp.rm(filePath);
|
||||||
|
}
|
||||||
|
log.info("文件删除完成: {}", filePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 创建目录 - 本地模式优化
|
||||||
|
*/
|
||||||
|
public void createDirectory(String directory) throws Exception {
|
||||||
|
if (isLocalMode) {
|
||||||
|
File dir = new File(directory);
|
||||||
|
if (!dir.exists()) {
|
||||||
|
dir.mkdirs();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ChannelSftp sftp = getSftpChannel();
|
||||||
|
try {
|
||||||
|
sftp.mkdir(directory);
|
||||||
|
} catch (SftpException e) {
|
||||||
|
// 目录可能已存在
|
||||||
|
log.debug("目录创建失败,可能已存在: {}", directory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.info("目录操作完成: {}", directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 根据 AlarmRule 实体保存或更新 Prometheus 告警规则
|
||||||
|
*/
|
||||||
|
public boolean saveOrUpdateAlarmRule(AlarmRule alarmRule) throws Exception {
|
||||||
|
if (alarmRule.getEnabled() == null || alarmRule.getEnabled() != 1) {
|
||||||
|
log.warn("告警规则 [{}] 未启用,跳过保存", alarmRule.getName());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. 生成 YAML 内容
|
||||||
|
String ruleContent = generatePrometheusAlertRule(alarmRule);
|
||||||
|
// 2. 使用 ID 作为文件名,确保唯一性
|
||||||
|
String fileName = alarmRule.getId() + ".yml";
|
||||||
|
String fullPath = FileUtil.normalize(alertRulesPath + "/" + fileName);
|
||||||
|
|
||||||
|
// 3. 写入临时文件并上传
|
||||||
|
String tempPath = System.getProperty("java.io.tmpdir")+ fileName;
|
||||||
|
writeTempFile(tempPath, ruleContent);
|
||||||
|
|
||||||
|
// 4. 如果原文件存在则备份
|
||||||
|
if (fileExists(fullPath)) {
|
||||||
|
backupOriginalFile(fullPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
uploadFile(tempPath, fullPath);
|
||||||
|
|
||||||
|
// 5. 验证规则语法
|
||||||
|
if (!validateAlertRules(fullPath)) {
|
||||||
|
log.error("告警规则 [{}] 验证失败,回滚更改", alarmRule.getName());
|
||||||
|
restoreBackupFile(fullPath);
|
||||||
|
throw new Exception("告警规则语法验证失败");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. 重载 Prometheus
|
||||||
|
reloadPrometheus();
|
||||||
|
log.info("告警规则 [{}] 已成功同步至 Prometheus", alarmRule.getName());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* 校验告警规则文件的语法 (调用 promtool check rules)
|
||||||
|
*/
|
||||||
|
private boolean validateAlertRules(String filePath) throws Exception {
|
||||||
|
String promtoolPath = prometheusHome + "/promtool";
|
||||||
|
String command = promtoolPath + " check rules " + filePath;
|
||||||
|
try {
|
||||||
|
String result = executeCommand(command);
|
||||||
|
return result.toLowerCase().contains("success") || result.toLowerCase().contains("check passed");
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("规则文件验证异常: {}", e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* 将 AlarmRule 实体转换为 Prometheus YAML 字符串
|
||||||
|
*/
|
||||||
|
private String generatePrometheusAlertRule(AlarmRule alarmRule) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("groups:\n");
|
||||||
|
sb.append("- name: ").append(alarmRule.getName()).append("_group\n");
|
||||||
|
sb.append(" rules:\n");
|
||||||
|
sb.append(" - alert: ").append(sanitizeMetricName(alarmRule.getName())).append("\n");
|
||||||
|
|
||||||
|
// 生成 PromQL 表达式
|
||||||
|
String expr = buildExpression(alarmRule);
|
||||||
|
sb.append(" expr: ").append(expr).append("\n");
|
||||||
|
|
||||||
|
// 沉默周期/持续时间
|
||||||
|
if (alarmRule.getSilenceCycle() != null) {
|
||||||
|
sb.append(" for: ").append(convertSecondsToDuration(alarmRule.getSilenceCycle())).append("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 标签
|
||||||
|
sb.append(" labels:\n");
|
||||||
|
sb.append(" severity: ").append(getSeverityLevel(alarmRule)).append("\n");
|
||||||
|
sb.append(" source_type: \"").append(defaultString(alarmRule.getSourceType())).append("\"\n");
|
||||||
|
sb.append(" resource_id: \"").append(defaultString(alarmRule.getSourceId())).append("\"\n");
|
||||||
|
sb.append(" contact_group: \"").append(defaultString(alarmRule.getContactId())).append("\"\n");
|
||||||
|
|
||||||
|
// 注解
|
||||||
|
Rule ruleObj = alarmRule.getRule();
|
||||||
|
String conditionDesc = (ruleObj != null) ? ruleObj.joint() : "指标异常";
|
||||||
|
|
||||||
|
sb.append(" annotations:\n");
|
||||||
|
sb.append(" summary: \"").append(alarmRule.getName()).append(" - ").append(conditionDesc).append("\"\n");
|
||||||
|
sb.append(" description: \"告警名称: ").append(alarmRule.getName())
|
||||||
|
.append("\\n资源ID: ").append(defaultString(alarmRule.getSourceId()))
|
||||||
|
.append("\\n监控条件: ").append(conditionDesc).append("\"\n");
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* 构建 PromQL 表达式
|
||||||
|
*/
|
||||||
|
private String buildExpression(AlarmRule alarmRule) {
|
||||||
|
Rule rule = alarmRule.getRule();
|
||||||
|
if (rule != null && StrUtil.isNotBlank(rule.getName()) && rule.getThreshold() != null) {
|
||||||
|
// 先使用 Rule 实体中的定义
|
||||||
|
return rule.getName() + " " + rule.getOperator() + " " + rule.getThreshold();
|
||||||
|
}
|
||||||
|
// 最后使用 itemId 和默认阈值
|
||||||
|
String metric = StrUtil.isNotBlank(alarmRule.getItemId()) ? alarmRule.getItemId() : "unknown_metric";
|
||||||
|
return metric + " > 80";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private String convertSecondsToDuration(Long seconds) {
|
||||||
|
if (seconds >= 86400) return (seconds / 86400) + "d";
|
||||||
|
if (seconds >= 3600) return (seconds / 3600) + "h";
|
||||||
|
if (seconds >= 60) return (seconds / 60) + "m";
|
||||||
|
return seconds + "s";
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getSeverityLevel(AlarmRule alarmRule) {
|
||||||
|
// 可以根据 notification 或其他字段映射严重级别
|
||||||
|
return "warning";
|
||||||
|
}
|
||||||
|
|
||||||
|
private String sanitizeMetricName(String name) {
|
||||||
|
// 将中文或特殊字符替换为下划线,保证符合 Prometheus 命名规范
|
||||||
|
return name.replaceAll("[^a-zA-Z0-9_]", "_");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String defaultString(String str) {
|
||||||
|
return StrUtil.isBlank(str) ? "unknown" : str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 写入临时文件
|
||||||
|
*/
|
||||||
|
private void writeTempFile(String filePath, String content) throws IOException {
|
||||||
|
File file = new File(filePath);
|
||||||
|
File parentDir = file.getParentFile();
|
||||||
|
if (parentDir != null && !parentDir.exists()) parentDir.mkdirs();
|
||||||
|
try (FileWriter writer = new FileWriter(file)) {
|
||||||
|
writer.write(content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void backupOriginalFile(String filePath) throws Exception {
|
||||||
|
String backupPath = filePath + ".backup";
|
||||||
|
if (isLocalMode) {
|
||||||
|
File original = new File(filePath);
|
||||||
|
File backup = new File(backupPath);
|
||||||
|
java.nio.file.Files.copy(original.toPath(), backup.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
} else {
|
||||||
|
ChannelSftp sftp = getSftpChannel();
|
||||||
|
sftp.rename(filePath, backupPath);
|
||||||
|
}
|
||||||
|
log.info("已备份文件: {} -> {}", filePath, backupPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void restoreBackupFile(String filePath) throws Exception {
|
||||||
|
String backupPath = filePath + ".backup";
|
||||||
|
if (fileExists(backupPath)) {
|
||||||
|
if (isLocalMode) {
|
||||||
|
File backup = new File(backupPath);
|
||||||
|
File original = new File(filePath);
|
||||||
|
java.nio.file.Files.copy(backup.toPath(), original.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
backup.delete();
|
||||||
|
} else {
|
||||||
|
ChannelSftp sftp = getSftpChannel();
|
||||||
|
sftp.rename(backupPath, filePath);
|
||||||
|
}
|
||||||
|
log.info("已从备份恢复文件: {}", filePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证Prometheus配置
|
||||||
|
*/
|
||||||
|
public boolean validatePrometheusConfig() throws Exception {
|
||||||
|
String command = promtoolPath + " check config " + prometheusConfigPath;
|
||||||
|
String result = executeCommand(command);
|
||||||
|
|
||||||
|
log.info("Prometheus配置验证结果: {}", result);
|
||||||
|
|
||||||
|
// 检查输出中是否包含"SUCCESS"或无错误信息
|
||||||
|
return result.toLowerCase().contains("success") ||
|
||||||
|
(!result.toLowerCase().contains("error") &&
|
||||||
|
!result.toLowerCase().contains("failed"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 重新加载Prometheus
|
||||||
|
*/
|
||||||
|
public void reloadPrometheus() throws Exception {
|
||||||
|
String command =
|
||||||
|
"curl -X POST http://" + prometheusHost + ":" + prometheusPort + "/-/reload";
|
||||||
|
String result = executeCommand(command);
|
||||||
|
|
||||||
|
log.info("Prometheus重载结果: {}", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 保存告警规则到文件
|
||||||
|
*/
|
||||||
|
public void saveAlertRuleToFile(String ruleName, String ruleContent) throws Exception {
|
||||||
|
String tempRulePath = "/tmp/" + ruleName + ".yml";
|
||||||
|
String finalRulePath = alertRulesPath + "/" + ruleName + ".yml";
|
||||||
|
|
||||||
|
// 写入临时文件
|
||||||
|
writeTempFile(tempRulePath, ruleContent);
|
||||||
|
|
||||||
|
// 上传到目标位置
|
||||||
|
uploadFile(tempRulePath, finalRulePath);
|
||||||
|
|
||||||
|
// 验证配置
|
||||||
|
if (!validatePrometheusConfig()) {
|
||||||
|
// 如果验证失败,回滚更改
|
||||||
|
deleteFile(finalRulePath);
|
||||||
|
throw new Exception("告警规则配置验证失败");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 重新加载Prometheus
|
||||||
|
reloadPrometheus();
|
||||||
|
|
||||||
|
log.info("告警规则保存成功: {}", ruleName);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取所有告警规则
|
||||||
|
*/
|
||||||
|
public List<String> getAllAlertRules() throws Exception {
|
||||||
|
List<String> rules = new ArrayList<>();
|
||||||
|
|
||||||
|
// 获取alert_rules目录下的所有YAML文件
|
||||||
|
List<String> files = listDirectory(alertRulesPath);
|
||||||
|
for (String file : files) {
|
||||||
|
if (file.toLowerCase().endsWith(".yml") || file.toLowerCase().endsWith(".yaml")) {
|
||||||
|
rules.add(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rules;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取告警规则内容
|
||||||
|
*/
|
||||||
|
public String getAlertRuleContent(String ruleName) throws Exception {
|
||||||
|
String rulePath = alertRulesPath + "/" + ruleName;
|
||||||
|
|
||||||
|
if (isLocalMode) {
|
||||||
|
// 本地模式:直接读取文件
|
||||||
|
try (BufferedReader reader = new BufferedReader(new FileReader(rulePath))) {
|
||||||
|
StringBuilder content = new StringBuilder();
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
content.append(line).append("\n");
|
||||||
|
}
|
||||||
|
return content.toString();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 远程模式:使用SFTP下载
|
||||||
|
ChannelSftp sftp = getSftpChannel();
|
||||||
|
try (InputStream inputStream = sftp.get(rulePath);
|
||||||
|
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
|
||||||
|
|
||||||
|
StringBuilder content = new StringBuilder();
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
content.append(line).append("\n");
|
||||||
|
}
|
||||||
|
return content.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 删除告警规则
|
||||||
|
*/
|
||||||
|
public void deleteAlertRule(String ruleName) throws Exception {
|
||||||
|
String rulePath = alertRulesPath + "/" + ruleName;
|
||||||
|
|
||||||
|
// 删除规则文件
|
||||||
|
deleteFile(rulePath);
|
||||||
|
|
||||||
|
// 验证配置
|
||||||
|
if (!validatePrometheusConfig()) {
|
||||||
|
throw new Exception("删除告警规则后配置验证失败");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 重新加载Prometheus
|
||||||
|
reloadPrometheus();
|
||||||
|
|
||||||
|
log.info("告警规则删除成功: {}", ruleName);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 更新告警规则
|
||||||
|
*/
|
||||||
|
public void updateAlertRule(String ruleName, String newRuleContent) throws Exception {
|
||||||
|
String tempRulePath = "/tmp/" + ruleName + ".yml";
|
||||||
|
String finalRulePath = alertRulesPath + "/" + ruleName;
|
||||||
|
|
||||||
|
// 写入临时文件
|
||||||
|
writeTempFile(tempRulePath, newRuleContent);
|
||||||
|
|
||||||
|
// 上传到目标位置
|
||||||
|
uploadFile(tempRulePath, finalRulePath);
|
||||||
|
|
||||||
|
// 验证配置
|
||||||
|
if (!validatePrometheusConfig()) {
|
||||||
|
throw new Exception("更新告警规则后配置验证失败");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 重新加载Prometheus
|
||||||
|
reloadPrometheus();
|
||||||
|
|
||||||
|
log.info("告警规则更新成功: {}", ruleName);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 关闭所有连接
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
if (channelExec != null && channelExec.isConnected()) {
|
||||||
|
channelExec.disconnect();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (channelSftp != null && channelSftp.isConnected()) {
|
||||||
|
channelSftp.disconnect();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (session != null && session.isConnected()) {
|
||||||
|
session.disconnect();
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Prometheus Alert Manager 已关闭");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Getters
|
||||||
|
public String getHost() {
|
||||||
|
return host;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAlertRulesPath() {
|
||||||
|
return alertRulesPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isLocalMode() {
|
||||||
|
return isLocalMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setters
|
||||||
|
public void setLocalMode(boolean localMode) {
|
||||||
|
isLocalMode = localMode;
|
||||||
|
if (localMode) {
|
||||||
|
adjustLocalTimeouts();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -6,6 +6,7 @@ import lombok.Data;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.jeecg.modules.base.entity.monitor.History;
|
import org.jeecg.modules.base.entity.monitor.History;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.cloud.context.config.annotation.RefreshScope;
|
||||||
import org.springframework.http.HttpEntity;
|
import org.springframework.http.HttpEntity;
|
||||||
import org.springframework.http.HttpMethod;
|
import org.springframework.http.HttpMethod;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
|
|
@ -20,10 +21,13 @@ import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
|
@RefreshScope
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class PrometheusUtil {
|
public class PrometheusUtil {
|
||||||
@Value("${prometheus.url:http://172.21.170.11:9090}")
|
@Value("${prometheus.url:http://172.21.170.11}")
|
||||||
private String prometheusUrl;
|
private String prometheusUrl;
|
||||||
|
@Value("${prometheus.prometheusPort:9090}")
|
||||||
|
private int prometheusPort;
|
||||||
|
|
||||||
@Value("${prometheus.connect-timeout:30}")
|
@Value("${prometheus.connect-timeout:30}")
|
||||||
private int connectTimeout;
|
private int connectTimeout;
|
||||||
|
|
@ -112,7 +116,7 @@ public class PrometheusUtil {
|
||||||
private URI buildQueryUri(String path, String promql, Long start, Long end, Long step) {
|
private URI buildQueryUri(String path, String promql, Long start, Long end, Long step) {
|
||||||
String encodedQuery = URLEncoder.encode(promql);
|
String encodedQuery = URLEncoder.encode(promql);
|
||||||
StringBuilder url = new StringBuilder();
|
StringBuilder url = new StringBuilder();
|
||||||
url.append(prometheusUrl).append(path)
|
url.append(prometheusUrl + ":" + prometheusPort).append(path)
|
||||||
.append("?query=").append(encodedQuery);
|
.append("?query=").append(encodedQuery);
|
||||||
|
|
||||||
if (start != null) {
|
if (start != null) {
|
||||||
|
|
@ -126,6 +130,7 @@ public class PrometheusUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
log.info("构建URI: " + url);
|
||||||
return new URI(url.toString());
|
return new URI(url.toString());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException("构建URI失败: " + url, e);
|
throw new RuntimeException("构建URI失败: " + url, e);
|
||||||
|
|
@ -207,7 +212,7 @@ public class PrometheusUtil {
|
||||||
//region 健康检查
|
//region 健康检查
|
||||||
public boolean healrhChenck() {
|
public boolean healrhChenck() {
|
||||||
try {
|
try {
|
||||||
String url = prometheusUrl + "/-/healthy";
|
String url = prometheusUrl + ":" + prometheusPort + "/-/healthy";
|
||||||
restTemplate.getForObject(url, String.class);
|
restTemplate.getForObject(url, String.class);
|
||||||
return true;
|
return true;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
@ -222,7 +227,7 @@ public class PrometheusUtil {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public JSONObject buildInfo() {
|
public JSONObject buildInfo() {
|
||||||
String url = prometheusUrl + "/api/v1/status/buildinfo";
|
String url = prometheusUrl + ":" + prometheusPort + "/api/v1/status/buildinfo";
|
||||||
return get(url).getJSONObject("data");
|
return get(url).getJSONObject("data");
|
||||||
}
|
}
|
||||||
//endregion
|
//endregion
|
||||||
|
|
@ -239,7 +244,7 @@ public class PrometheusUtil {
|
||||||
public List<QueryResult> queryInstantFull(String promql, Long time) {
|
public List<QueryResult> queryInstantFull(String promql, Long time) {
|
||||||
|
|
||||||
StringBuilder url = new StringBuilder();
|
StringBuilder url = new StringBuilder();
|
||||||
url.append(prometheusUrl).append("/api/v1/query?query=")
|
url.append(prometheusUrl+":"+prometheusPort).append("/api/v1/query?query=")
|
||||||
.append(URLEncoder.encode(promql));
|
.append(URLEncoder.encode(promql));
|
||||||
|
|
||||||
if (time != null) {
|
if (time != null) {
|
||||||
|
|
@ -402,7 +407,7 @@ public class PrometheusUtil {
|
||||||
*/
|
*/
|
||||||
public List<Map<String, String>> series(List<String> selectors, long start, long end) {
|
public List<Map<String, String>> series(List<String> selectors, long start, long end) {
|
||||||
StringBuilder url = new StringBuilder();
|
StringBuilder url = new StringBuilder();
|
||||||
url.append(prometheusUrl).append("/api/v1/series?");
|
url.append(prometheusUrl+":"+prometheusPort).append("/api/v1/series?");
|
||||||
for (String selector : selectors) {
|
for (String selector : selectors) {
|
||||||
url.append("match[]=").append(URLEncoder.encode(selector))
|
url.append("match[]=").append(URLEncoder.encode(selector))
|
||||||
.append("&");
|
.append("&");
|
||||||
|
|
@ -426,13 +431,11 @@ public class PrometheusUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取标签值列表
|
* 获取标签值列表
|
||||||
*/
|
*/
|
||||||
public Set<String> labelValues(String labelName) {
|
public Set<String> labelValues(String labelName) {
|
||||||
String url = String.format("%s/api/v1/label/%s/values", prometheusUrl,
|
String url = String.format("%s/api/v1/label/%s/values", prometheusUrl+":"+prometheusPort,
|
||||||
URLEncoder.encode(labelName));
|
URLEncoder.encode(labelName));
|
||||||
|
|
||||||
JSONObject json = get(url);
|
JSONObject json = get(url);
|
||||||
|
|
@ -469,7 +472,7 @@ public class PrometheusUtil {
|
||||||
* 获取元数据
|
* 获取元数据
|
||||||
*/
|
*/
|
||||||
public List<MetricMetadata> metadata(String metric) {
|
public List<MetricMetadata> metadata(String metric) {
|
||||||
String url = prometheusUrl + "/api/v1/metadata";
|
String url = prometheusUrl+":"+prometheusPort + "/api/v1/metadata";
|
||||||
if (metric != null) {
|
if (metric != null) {
|
||||||
url = url + "&metric=" + URLEncoder.encode(metric);
|
url = url + "&metric=" + URLEncoder.encode(metric);
|
||||||
}
|
}
|
||||||
|
|
@ -500,7 +503,7 @@ public class PrometheusUtil {
|
||||||
* @return List<TargetInfo>
|
* @return List<TargetInfo>
|
||||||
*/
|
*/
|
||||||
public List<TargetInfo> targets() {
|
public List<TargetInfo> targets() {
|
||||||
String url = prometheusUrl + "/api/v1/targets";
|
String url = prometheusUrl+":"+prometheusPort + "/api/v1/targets";
|
||||||
JSONObject json = get(url);
|
JSONObject json = get(url);
|
||||||
JSONArray activeTargets = json.getJSONObject("data").getJSONArray("activeTargets");
|
JSONArray activeTargets = json.getJSONObject("data").getJSONArray("activeTargets");
|
||||||
List<TargetInfo> result = new ArrayList<>();
|
List<TargetInfo> result = new ArrayList<>();
|
||||||
|
|
@ -541,7 +544,7 @@ public class PrometheusUtil {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public List<AlertRule> alertRules() {
|
public List<AlertRule> alertRules() {
|
||||||
String url = prometheusUrl + "/api/v1/alertRules";
|
String url = prometheusUrl+":"+prometheusPort + "/api/v1/alertRules";
|
||||||
JSONObject json = get(url);
|
JSONObject json = get(url);
|
||||||
JSONArray groups = json.getJSONObject("data").getJSONArray("groups");
|
JSONArray groups = json.getJSONObject("data").getJSONArray("groups");
|
||||||
List<AlertRule> result = new ArrayList<>();
|
List<AlertRule> result = new ArrayList<>();
|
||||||
|
|
@ -664,7 +667,34 @@ public class PrometheusUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 查询数据库挂载的磁盘容量信息
|
||||||
|
*/
|
||||||
|
|
||||||
|
public DiskMetrics queryDiskMetrics(String mountpoint) {
|
||||||
|
// 构建 PromQL 查询
|
||||||
|
String promql = String.format(
|
||||||
|
"node_filesystem_size_bytes{mountpoint=\"%s\"} or " +
|
||||||
|
"node_filesystem_avail_bytes{mountpoint=\"%s\"} or " +
|
||||||
|
"node_filesystem_free_bytes{mountpoint=\"%s\"}",
|
||||||
|
mountpoint, mountpoint, mountpoint
|
||||||
|
);
|
||||||
|
|
||||||
|
// 调用 Prometheus API
|
||||||
|
List<QueryResult> results = queryInstantFull(promql, null);
|
||||||
|
|
||||||
|
return new DiskMetrics();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//endregion
|
//endregion
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
|
class DiskMetrics {
|
||||||
|
private Double totalBytes; // 总字节数
|
||||||
|
private Double availableBytes; // 可用字节数
|
||||||
|
private Double freeBytes; // 空闲字节数
|
||||||
|
}
|
||||||
|
|
@ -22,6 +22,7 @@ import org.jeecg.common.constant.SymbolConstant;
|
||||||
import org.jeecg.common.util.EmailUtil;
|
import org.jeecg.common.util.EmailUtil;
|
||||||
import org.jeecg.common.util.JDBCUtil;
|
import org.jeecg.common.util.JDBCUtil;
|
||||||
import org.jeecg.common.util.RedisUtil;
|
import org.jeecg.common.util.RedisUtil;
|
||||||
|
import org.jeecg.modules.Util.MonitorConfig;
|
||||||
import org.jeecg.modules.Util.PrometheusUtil;
|
import org.jeecg.modules.Util.PrometheusUtil;
|
||||||
import org.jeecg.modules.base.dto.NameValue;
|
import org.jeecg.modules.base.dto.NameValue;
|
||||||
import org.jeecg.modules.base.entity.monitor.Host;
|
import org.jeecg.modules.base.entity.monitor.Host;
|
||||||
|
|
@ -31,6 +32,7 @@ import org.jeecg.modules.base.entity.postgre.SysDatabase;
|
||||||
import org.jeecg.modules.base.entity.postgre.SysEmail;
|
import org.jeecg.modules.base.entity.postgre.SysEmail;
|
||||||
import org.jeecg.modules.base.entity.postgre.SysServer;
|
import org.jeecg.modules.base.entity.postgre.SysServer;
|
||||||
import org.jeecg.modules.base.enums.ServerStatus;
|
import org.jeecg.modules.base.enums.ServerStatus;
|
||||||
|
import org.jeecg.modules.entity.MonitorItem;
|
||||||
import org.jeecg.modules.feignclient.ManageUtil;
|
import org.jeecg.modules.feignclient.ManageUtil;
|
||||||
import org.jeecg.modules.feignclient.MonitorAlarm;
|
import org.jeecg.modules.feignclient.MonitorAlarm;
|
||||||
import org.jeecg.modules.qiyeEmail.base.InstanceSDK;
|
import org.jeecg.modules.qiyeEmail.base.InstanceSDK;
|
||||||
|
|
@ -120,14 +122,14 @@ public class StatusAspect {
|
||||||
List<Host> hosts = new ArrayList<>();
|
List<Host> hosts = new ArrayList<>();
|
||||||
//通过prometheus获取type=db的数据 up{type="db"}
|
//通过prometheus获取type=db的数据 up{type="db"}
|
||||||
List<PrometheusUtil.QueryResult> hostResults =
|
List<PrometheusUtil.QueryResult> hostResults =
|
||||||
prometheusUtil.queryInstantFull("max by (name) (up{type=\"db\"})", null);
|
prometheusUtil.queryInstantFull("up{type=\"db\"}", null);
|
||||||
for (PrometheusUtil.QueryResult result : hostResults) {
|
for (PrometheusUtil.QueryResult result : hostResults) {
|
||||||
Host host = new Host();
|
Host host = new Host();
|
||||||
String hostName = result.getMetrics().get("name");
|
String hostName = result.getMetrics().get("name");
|
||||||
String job = result.getMetrics().get("job");
|
String job = result.getMetrics().get("job");
|
||||||
String instance = result.getMetrics().get("instance");
|
String instance = result.getMetrics().get("instance");
|
||||||
Double status = result.getValue().getValue();
|
int status = (int) result.getValue().getValue();
|
||||||
host.setCode(job);
|
host.setCode(hostName);
|
||||||
host.setHostId(instance);
|
host.setHostId(instance);
|
||||||
host.setName(hostName);
|
host.setName(hostName);
|
||||||
host.setStatus(String.valueOf(status));
|
host.setStatus(String.valueOf(status));
|
||||||
|
|
@ -206,7 +208,7 @@ public class StatusAspect {
|
||||||
|
|
||||||
List<PrometheusUtil.QueryResult> hostResults =
|
List<PrometheusUtil.QueryResult> hostResults =
|
||||||
prometheusUtil.queryInstantFull(
|
prometheusUtil.queryInstantFull(
|
||||||
"node_boot_time_seconds{instance=\"" + name + "\"}", null);
|
"node_boot_time_seconds{instance=~\"" + name + ".*\"}", null);
|
||||||
|
|
||||||
if (ObjectUtil.isNull(hostResults) || CollUtil.isEmpty(hostResults)) {
|
if (ObjectUtil.isNull(hostResults) || CollUtil.isEmpty(hostResults)) {
|
||||||
redisUtil.hset(key, id, new NameValue(name, online));
|
redisUtil.hset(key, id, new NameValue(name, online));
|
||||||
|
|
@ -217,7 +219,8 @@ public class StatusAspect {
|
||||||
Host host = new Host();
|
Host host = new Host();
|
||||||
String hostName = result.getMetrics().get("__name__");
|
String hostName = result.getMetrics().get("__name__");
|
||||||
String instance = result.getMetrics().get("instance");
|
String instance = result.getMetrics().get("instance");
|
||||||
Double status = 1.0;
|
int status = 1;
|
||||||
|
host.setCode(instance);
|
||||||
host.setHostId(instance);
|
host.setHostId(instance);
|
||||||
host.setName(hostName);
|
host.setName(hostName);
|
||||||
host.setStatus(String.valueOf(status));
|
host.setStatus(String.valueOf(status));
|
||||||
|
|
@ -240,24 +243,30 @@ public class StatusAspect {
|
||||||
ServerStatus.WARN.getValue()), status);
|
ServerStatus.WARN.getValue()), status);
|
||||||
redisUtil.hset(key, id, new NameValue(name, online));
|
redisUtil.hset(key, id, new NameValue(name, online));
|
||||||
// 更新该服务器的HostId
|
// 更新该服务器的HostId
|
||||||
server.setHostId(host.getHostId());
|
String hostId = host.getHostId();
|
||||||
|
server.setHostId(hostId);
|
||||||
serverService.updateById(server);
|
serverService.updateById(server);
|
||||||
// 同步服务器监控项 获取所有指标
|
// 同步服务器监控项 获取所有指标
|
||||||
Set<String> metric = prometheusUtil.metricNames();
|
Collection<Item> monitorItems = MonitorConfig.getAllItems();
|
||||||
List<String> metricNames =new ArrayList<>();
|
|
||||||
metricNames.add("instance=\"172.21.170.11:9090\"");
|
//Set<String> metric = prometheusUtil.metricNames();
|
||||||
long end = System.currentTimeMillis() / 1000;
|
//List<String> metricNames =new ArrayList<>();
|
||||||
long start = end - 3600; // 过去1小时
|
//metricNames.add("instance=\""+host.getHostId()+"\"");
|
||||||
prometheusUtil.series(metricNames,start, end);
|
//long end = System.currentTimeMillis() / 1000;
|
||||||
Map<String, Item> itemMap = host.getItems();
|
//long start = end - 3600; // 过去1小时
|
||||||
if (MapUtil.isEmpty(itemMap) || CollUtil.isEmpty(itemMap.values())) {
|
//prometheusUtil.series(metricNames,start, end);
|
||||||
return;
|
// Map<String, Item> itemMap = host.getItems();
|
||||||
}
|
// if (MapUtil.isEmpty(itemMap) || CollUtil.isEmpty(itemMap.values())) {
|
||||||
Collection<Item> items = itemMap.values();
|
// return;
|
||||||
|
// }
|
||||||
|
//Collection<Item> items = itemMap.values();
|
||||||
|
|
||||||
List<AlarmItem> alarmItems = new ArrayList<>();
|
List<AlarmItem> alarmItems = new ArrayList<>();
|
||||||
items.forEach(item -> {
|
|
||||||
|
monitorItems.forEach(item -> {
|
||||||
AlarmItem alarmItem = BeanUtil.copyProperties(item, AlarmItem.class);
|
AlarmItem alarmItem = BeanUtil.copyProperties(item, AlarmItem.class);
|
||||||
alarmItem.setId(item.getItemId());
|
alarmItem.setId(item.getItemId());
|
||||||
|
alarmItem.setHostId(hostId);
|
||||||
alarmItems.add(alarmItem);
|
alarmItems.add(alarmItem);
|
||||||
});
|
});
|
||||||
alarmItemService.saveOrUpdateBatch(alarmItems);
|
alarmItemService.saveOrUpdateBatch(alarmItems);
|
||||||
|
|
|
||||||
|
|
@ -12,32 +12,102 @@ public class MetricConfig {
|
||||||
public static final Map<String, MetricConfig> METRIC_MAP = new HashMap<>();
|
public static final Map<String, MetricConfig> METRIC_MAP = new HashMap<>();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
// 根据你提供的 getItems 返回结果进行 1:1 映射
|
// 根据之前讨论的固定ID进行映射
|
||||||
METRIC_MAP.put("37494", new MetricConfig("cpuUtilization",
|
// CPU相关 (1001-1003)
|
||||||
|
METRIC_MAP.put("1001", new MetricConfig("cpu_usage_percent",
|
||||||
|
"(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[5m]))) * 100",
|
||||||
|
"%"));
|
||||||
|
METRIC_MAP.put("1002", new MetricConfig("cpuUtilization",
|
||||||
"(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[1m]))) * 100",
|
"(1 - avg(irate(node_cpu_seconds_total{instance='%s',mode='idle'}[1m]))) * 100",
|
||||||
"%"));
|
"%"));
|
||||||
METRIC_MAP.put("37445",
|
METRIC_MAP.put("1003", new MetricConfig("system_load_average",
|
||||||
new MetricConfig("swapTotalSize", "node_memory_SwapTotal_bytes{instance='%s'}",
|
"node_load1{instance='%s'}",
|
||||||
"B"));
|
"load"));
|
||||||
METRIC_MAP.put("37469", new MetricConfig("load", "node_load1{instance='%s'}", ""));
|
|
||||||
METRIC_MAP.put("37452", new MetricConfig("swapUtilization",
|
// 内存相关 (1101-1107)
|
||||||
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
|
METRIC_MAP.put("1101", new MetricConfig("memory_used_percent",
|
||||||
"%"));
|
|
||||||
METRIC_MAP.put("37443",
|
|
||||||
new MetricConfig("freeSwapSpace", "node_memory_SwapFree_bytes{instance='%s'}",
|
|
||||||
"B"));
|
|
||||||
METRIC_MAP.put("37460", new MetricConfig("latency",
|
|
||||||
"irate(node_disk_read_time_seconds_total{instance='%s'}[1m]) * 1000", "ms"));
|
|
||||||
METRIC_MAP.put("37449", new MetricConfig("memoryUtilization",
|
|
||||||
"(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100",
|
"(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100",
|
||||||
"%"));
|
"%"));
|
||||||
METRIC_MAP.put("37660", new MetricConfig("throughput",
|
METRIC_MAP.put("1102", new MetricConfig("memoryUtilization",
|
||||||
|
"(1 - (node_memory_MemAvailable_bytes{instance='%s'} / node_memory_MemTotal_bytes{instance='%s'})) * 100",
|
||||||
|
"%"));
|
||||||
|
METRIC_MAP.put("1103", new MetricConfig("memAvailable",
|
||||||
|
"node_memory_MemAvailable_bytes{instance='%s'}",
|
||||||
|
"B"));
|
||||||
|
METRIC_MAP.put("1104", new MetricConfig("swapUtilization",
|
||||||
|
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
|
||||||
|
"%"));
|
||||||
|
METRIC_MAP.put("1105", new MetricConfig("swapTotalSize",
|
||||||
|
"node_memory_SwapTotal_bytes{instance='%s'}",
|
||||||
|
"B"));
|
||||||
|
METRIC_MAP.put("1106", new MetricConfig("freeSwapSpace",
|
||||||
|
"node_memory_SwapFree_bytes{instance='%s'}",
|
||||||
|
"B"));
|
||||||
|
METRIC_MAP.put("1107", new MetricConfig("swap_used_percent",
|
||||||
|
"(1 - (node_memory_SwapFree_bytes{instance='%s'} / node_memory_SwapTotal_bytes{instance='%s'})) * 100",
|
||||||
|
"%"));
|
||||||
|
|
||||||
|
// 磁盘相关 (1201-1202)
|
||||||
|
METRIC_MAP.put("1201", new MetricConfig("disk_io_read_bytes",
|
||||||
|
"sum(irate(node_disk_read_bytes_total{instance='%s'}[5m]))",
|
||||||
|
"bytes/s"));
|
||||||
|
METRIC_MAP.put("1202", new MetricConfig("disk_io_write_bytes",
|
||||||
|
"irate(node_disk_written_bytes_total{instance='%s'}[5m])",
|
||||||
|
"bytes/s"));
|
||||||
|
|
||||||
|
// 网络相关 (1301-1306)
|
||||||
|
METRIC_MAP.put("1301", new MetricConfig("network_receive_bytes",
|
||||||
|
"irate(node_network_receive_bytes_total{instance='%s',device!='lo'}[5m])",
|
||||||
|
"bytes/s"));
|
||||||
|
METRIC_MAP.put("1302", new MetricConfig("network_transmit_bytes",
|
||||||
|
"irate(node_network_transmit_bytes_total{instance='%s',device!='lo'}[5m])",
|
||||||
|
"bytes/s"));
|
||||||
|
METRIC_MAP.put("1303", new MetricConfig("throughput",
|
||||||
"sum(irate(node_network_receive_bytes_total{instance='%s',device!~'lo'}[1m]))",
|
"sum(irate(node_network_receive_bytes_total{instance='%s',device!~'lo'}[1m]))",
|
||||||
|
"Kb/s"));
|
||||||
|
METRIC_MAP.put("1304", new MetricConfig("latency_ms",
|
||||||
|
"avg(irate(node_network_receive_bytes_total{instance='%s'}[5m])) > 0",
|
||||||
|
"ms"));
|
||||||
|
METRIC_MAP.put("1305", new MetricConfig("latency_s",
|
||||||
|
"histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{instance='%s'}[5m])) by (le))",
|
||||||
|
"s"));
|
||||||
|
METRIC_MAP.put("1306", new MetricConfig("tcp_established_connections",
|
||||||
|
"node_netstat_Tcp_CurrEstab{instance='%s'}",
|
||||||
|
"count"));
|
||||||
|
|
||||||
|
// 进程相关 (1401)
|
||||||
|
METRIC_MAP.put("1401", new MetricConfig("process_count",
|
||||||
|
"node_procs_running{instance='%s'}",
|
||||||
|
"count"));
|
||||||
|
|
||||||
|
// 数据库相关 (1501-1502)
|
||||||
|
METRIC_MAP.put("1501", new MetricConfig("dbMemory",
|
||||||
|
"pg_database_size_bytes{datname='armd',instance='%s'}/1024",
|
||||||
|
"Kb"));
|
||||||
|
METRIC_MAP.put("1502", new MetricConfig("dblSize",
|
||||||
|
"sum(pg_database_size_bytes{instance='%s'})",
|
||||||
"B"));
|
"B"));
|
||||||
METRIC_MAP.put("37493", new MetricConfig("responseSuccessRate", "vector(100)", "%"));
|
|
||||||
METRIC_MAP.put("37454",
|
// 应用相关 (1601-1603)
|
||||||
new MetricConfig("memAvailable", "node_memory_MemAvailable_bytes{instance='%s'}",
|
METRIC_MAP.put("1601", new MetricConfig("responseSuccessRate",
|
||||||
|
"sum(rate(http_requests_total{instance='%s',status=~'2..'}[5m])) / sum(rate(http_requests_total{instance='%s'}[5m])) * 100",
|
||||||
|
"%"));
|
||||||
|
METRIC_MAP.put("1602", new MetricConfig("login",
|
||||||
|
"sum(increase(login_attempts_total{instance='%s'}[5m]))",
|
||||||
|
""));
|
||||||
|
METRIC_MAP.put("1603", new MetricConfig("connections",
|
||||||
|
"node_netstat_Tcp_CurrEstab{instance='%s'}",
|
||||||
|
""));
|
||||||
|
|
||||||
|
// 日志相关 (1701)
|
||||||
|
METRIC_MAP.put("1701", new MetricConfig("logRemainingSize",
|
||||||
|
"node_filesystem_avail_bytes{instance='%s',mountpoint=~'/var/log.*|/opt/log.*'}",
|
||||||
"B"));
|
"B"));
|
||||||
|
|
||||||
|
// 系统负载 (1801)
|
||||||
|
METRIC_MAP.put("1801", new MetricConfig("load",
|
||||||
|
"node_load1{instance='%s'}",
|
||||||
|
""));
|
||||||
}
|
}
|
||||||
|
|
||||||
private String name;
|
private String name;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
package org.jeecg.modules.entity;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
import org.jeecg.modules.base.entity.monitor.Item;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
public class MonitorItem extends Item {
|
||||||
|
|
||||||
|
// 构造方法
|
||||||
|
public MonitorItem() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public MonitorItem(String itemId, String name, String hostId, String description,
|
||||||
|
String units, String status, org.jeecg.modules.entity.ValueType valueType) {
|
||||||
|
|
||||||
|
this.setItemId(itemId);
|
||||||
|
this.setName(name);
|
||||||
|
this.setHostId(hostId);
|
||||||
|
this.setDescription(description);
|
||||||
|
this.setUnits(units);
|
||||||
|
this.setStatus(status);
|
||||||
|
this.setValueType(String.valueOf(valueType.getCode()));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
package org.jeecg.modules.entity;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public enum ValueType {
|
||||||
|
|
||||||
|
// 使用枚举值和对应的数字代码
|
||||||
|
INT(1, "int", "整型"),
|
||||||
|
FLOAT(2, "float", "浮点型"),
|
||||||
|
DOUBLE(3, "double", "双精度浮点型"),
|
||||||
|
LONG(4, "long", "长整型"),
|
||||||
|
STRING(5, "string", "字符串类型"),
|
||||||
|
BOOLEAN(6, "boolean", "布尔类型");
|
||||||
|
|
||||||
|
private final int code; // 数字代码
|
||||||
|
private final String name; // 类型名称
|
||||||
|
private final String desc; // 描述
|
||||||
|
|
||||||
|
ValueType(int code, String name, String desc) {
|
||||||
|
this.code = code;
|
||||||
|
this.name = name;
|
||||||
|
this.desc = desc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDesc() {
|
||||||
|
return desc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 根据代码获取枚举
|
||||||
|
*/
|
||||||
|
public static ValueType fromCode(int code) {
|
||||||
|
for (ValueType type : values()) {
|
||||||
|
if (type.code == code) {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("无效的值类型代码: " + code);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 根据名称获取枚举
|
||||||
|
*/
|
||||||
|
public static ValueType fromName(String name) {
|
||||||
|
for (ValueType type : values()) {
|
||||||
|
if (type.name.equalsIgnoreCase(name)) {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("无效的值类型名称: " + name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证代码是否有效
|
||||||
|
*/
|
||||||
|
public static boolean isValidCode(int code) {
|
||||||
|
for (ValueType type : values()) {
|
||||||
|
if (type.code == code) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取所有代码列表
|
||||||
|
*/
|
||||||
|
public static List<Integer> getAllCodes() {
|
||||||
|
return Arrays.stream(values())
|
||||||
|
.map(ValueType::getCode)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -16,8 +16,7 @@ public interface IMonitorService {
|
||||||
|
|
||||||
void dbDetails(String start, String end, String hostId);
|
void dbDetails(String start, String end, String hostId);
|
||||||
|
|
||||||
Result<?> queryCpuHistoryData(String itemId, Integer itemType, String start, String end,
|
Result<?> queryCpuHistoryData(String itemId, Integer itemType, String start, String end);
|
||||||
String instance);
|
|
||||||
|
|
||||||
Result<?> detail( String hostId, String pageName, String start, String end);
|
Result<?> detail( String hostId, String pageName, String start, String end);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ import org.jeecg.modules.base.entity.monitor.History;
|
||||||
import org.jeecg.modules.base.entity.monitor.Host;
|
import org.jeecg.modules.base.entity.monitor.Host;
|
||||||
import org.jeecg.modules.base.entity.monitor.Item;
|
import org.jeecg.modules.base.entity.monitor.Item;
|
||||||
import org.jeecg.modules.base.entity.monitor.ItemHistory;
|
import org.jeecg.modules.base.entity.monitor.ItemHistory;
|
||||||
|
import org.jeecg.modules.base.entity.postgre.AlarmItem;
|
||||||
import org.jeecg.modules.base.entity.postgre.SysDatabase;
|
import org.jeecg.modules.base.entity.postgre.SysDatabase;
|
||||||
import org.jeecg.modules.base.entity.postgre.SysServer;
|
import org.jeecg.modules.base.entity.postgre.SysServer;
|
||||||
import org.jeecg.modules.base.enums.ServerStatus;
|
import org.jeecg.modules.base.enums.ServerStatus;
|
||||||
|
|
@ -42,6 +43,7 @@ import org.jeecg.modules.entity.*;
|
||||||
import org.jeecg.modules.feignclient.ManageUtil;
|
import org.jeecg.modules.feignclient.ManageUtil;
|
||||||
import org.jeecg.modules.feignclient.MonitorAlarm;
|
import org.jeecg.modules.feignclient.MonitorAlarm;
|
||||||
import org.jeecg.modules.feignclient.SystemClient;
|
import org.jeecg.modules.feignclient.SystemClient;
|
||||||
|
import org.jeecg.modules.mapper.AlarmItemMapper;
|
||||||
import org.jeecg.modules.mapper.SysServerMapper;
|
import org.jeecg.modules.mapper.SysServerMapper;
|
||||||
import org.jeecg.modules.service.IMonitorService;
|
import org.jeecg.modules.service.IMonitorService;
|
||||||
import org.jeecg.modules.service.ISysDatabaseService;
|
import org.jeecg.modules.service.ISysDatabaseService;
|
||||||
|
|
@ -77,6 +79,8 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private SystemClient systemClient;
|
private SystemClient systemClient;
|
||||||
|
@Autowired
|
||||||
|
private AlarmItemMapper alarmItemMapper;
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private ISysDatabaseService sysDatabaseService;
|
private ISysDatabaseService sysDatabaseService;
|
||||||
|
|
@ -86,41 +90,8 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
private static final DateTimeFormatter FORMATTER =
|
private static final DateTimeFormatter FORMATTER =
|
||||||
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||||
|
|
||||||
private static final String INSTANCE_9100 = "172.21.170.11:9100";
|
|
||||||
//private static final String INSTANCE_9256 = "172.21.170.11:9256";
|
|
||||||
//endregion
|
//endregion
|
||||||
|
|
||||||
// region 数据库监控 PromQL
|
|
||||||
|
|
||||||
private static final String PG_INSTANCE = "172.21.170.11.*";
|
|
||||||
|
|
||||||
private static final String SHARED_BUFFERS =
|
|
||||||
"pg_settings_shared_buffers_bytes{instance=~\"" + PG_INSTANCE + "\"}";
|
|
||||||
|
|
||||||
private static final String ACTIVE_CONNECTIONS =
|
|
||||||
"sum(pg_stat_database_numbackends{instance=~\"" + PG_INSTANCE
|
|
||||||
+ "\",datname!~\"template[0-1]\"})";
|
|
||||||
|
|
||||||
private static final String CONNECTION_NUMBER = ACTIVE_CONNECTIONS;
|
|
||||||
|
|
||||||
private static final String BUFFER_HIT_RATIO =
|
|
||||||
"pg_stat_database_blks_hit{instance=~\"" + PG_INSTANCE + "\",datname=\"postgres\"}"
|
|
||||||
+ " / "
|
|
||||||
+ "(pg_stat_database_blks_hit{instance=~\"" + PG_INSTANCE
|
|
||||||
+ "\",datname=\"postgres\"}"
|
|
||||||
+ " + pg_stat_database_blks_read{instance=~\"" + PG_INSTANCE
|
|
||||||
+ "\",datname=\"postgres\"})"
|
|
||||||
+ " * 100";
|
|
||||||
|
|
||||||
private static final String WAL_SIZE =
|
|
||||||
"pg_settings_max_wal_size_bytes{instance=~\"" + PG_INSTANCE + "\"}";
|
|
||||||
|
|
||||||
private static final String DATABASE_SIZE =
|
|
||||||
"sum(pg_database_size_bytes{instance=~\"" + PG_INSTANCE + "\"})";
|
|
||||||
|
|
||||||
// endregion
|
|
||||||
|
|
||||||
|
|
||||||
//region 通用查询方法
|
//region 通用查询方法
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -159,8 +130,11 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
//region CPU 监控
|
//region CPU 监控
|
||||||
@Override
|
@Override
|
||||||
public Result<?> queryCpuHistoryData(String itemId, Integer itemType,
|
public Result<?> queryCpuHistoryData(String itemId, Integer itemType,
|
||||||
String start, String end, String instance) {
|
String start, String end) {
|
||||||
try {
|
try {
|
||||||
|
//通过itemId查找监控项
|
||||||
|
AlarmItem alarmItem = alarmItemMapper.selectById(itemId);
|
||||||
|
String instance = alarmItem.getHostId();
|
||||||
long startTime = parseTimestamp(start);
|
long startTime = parseTimestamp(start);
|
||||||
long endTime = parseTimestamp(end);
|
long endTime = parseTimestamp(end);
|
||||||
|
|
||||||
|
|
@ -196,7 +170,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
|
|
||||||
String promql = String.format(
|
String promql = String.format(
|
||||||
"1 - avg(rate(node_cpu_seconds_total{instance='%s', mode='idle'}[1m]))",
|
"1 - avg(rate(node_cpu_seconds_total{instance='%s', mode='idle'}[1m]))",
|
||||||
INSTANCE_9100);
|
hostId);
|
||||||
|
|
||||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||||
return parseToCpuResult(results, "cpuUtilization", "%", true);
|
return parseToCpuResult(results, "cpuUtilization", "%", true);
|
||||||
|
|
@ -209,7 +183,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
String promql = String.format(
|
String promql = String.format(
|
||||||
"(node_filesystem_avail_bytes{instance='%s', mountpoint='/'} / "
|
"(node_filesystem_avail_bytes{instance='%s', mountpoint='/'} / "
|
||||||
+ "node_filesystem_size_bytes{instance='%s', mountpoint='/'}) * 100",
|
+ "node_filesystem_size_bytes{instance='%s', mountpoint='/'}) * 100",
|
||||||
INSTANCE_9100, INSTANCE_9100);
|
hostId, hostId);
|
||||||
|
|
||||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||||
return parseToCpuResult(results, "cpuIdleRate", "%", false);
|
return parseToCpuResult(results, "cpuIdleRate", "%", false);
|
||||||
|
|
@ -220,7 +194,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
long end = parseToUnix(endStr);
|
long end = parseToUnix(endStr);
|
||||||
|
|
||||||
String promql = String.format(
|
String promql = String.format(
|
||||||
"irate(node_intr_total{instance='%s'}[1m])", INSTANCE_9100);
|
"irate(node_intr_total{instance='%s'}[1m])", hostId);
|
||||||
|
|
||||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||||
return parseToCpuResult(results, "cpuInterrupt", "", false);
|
return parseToCpuResult(results, "cpuInterrupt", "", false);
|
||||||
|
|
@ -231,7 +205,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
long end = parseToUnix(endStr);
|
long end = parseToUnix(endStr);
|
||||||
|
|
||||||
String promql = String.format(
|
String promql = String.format(
|
||||||
"irate(node_context_switches_total{instance='%s'}[1m])", INSTANCE_9100);
|
"irate(node_context_switches_total{instance='%s'}[1m])", hostId);
|
||||||
|
|
||||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||||
return parseToCpuResult(results, "cpuSwitch", "", false);
|
return parseToCpuResult(results, "cpuSwitch", "", false);
|
||||||
|
|
@ -243,7 +217,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
|
|
||||||
String promql = String.format(
|
String promql = String.format(
|
||||||
"sum by (cpu) (irate(node_cpu_seconds_total{instance='%s', mode!='idle'}[1m])) * 100",
|
"sum by (cpu) (irate(node_cpu_seconds_total{instance='%s', mode!='idle'}[1m])) * 100",
|
||||||
INSTANCE_9100);
|
instance);
|
||||||
|
|
||||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
||||||
List<CpuMonitorResult> coreResults = new ArrayList<>();
|
List<CpuMonitorResult> coreResults = new ArrayList<>();
|
||||||
|
|
@ -306,7 +280,6 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
}
|
}
|
||||||
//endregion
|
//endregion
|
||||||
|
|
||||||
|
|
||||||
//region 数据库 监控
|
//region 数据库 监控
|
||||||
@Override
|
@Override
|
||||||
public void dbDetails(String start, String end, String hostId) {
|
public void dbDetails(String start, String end, String hostId) {
|
||||||
|
|
@ -338,36 +311,44 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
case "postgresql":
|
case "postgresql":
|
||||||
//postgres
|
//postgres
|
||||||
result.add(buildItemFromQuery("Database memory state(Total memory)", "B",
|
result.add(buildItemFromQuery("Database memory state(Total memory)", "B",
|
||||||
SHARED_BUFFERS, startTime, endTime, step));
|
"pg_settings_shared_buffers_bytes{instance='"+hostId+"'}", startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Database user status(Logins per second)", "",
|
result.add(buildItemFromQuery("Database user status(Logins per second)", "",
|
||||||
ACTIVE_CONNECTIONS, startTime, endTime, step));
|
"sum(pg_stat_database_numbackends{instance=~\"" + hostId
|
||||||
|
+ "\",datname!~\"template[0-1]\"})", startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Database connection number", "",
|
result.add(buildItemFromQuery("Database connection number", "",
|
||||||
CONNECTION_NUMBER, startTime, endTime, step));
|
"sum(pg_stat_database_numbackends{instance=~\"" + hostId
|
||||||
|
+ "\",datname!~\"template[0-1]\"})", startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Database connection response time", "",
|
result.add(buildItemFromQuery("Database connection response time", "",
|
||||||
BUFFER_HIT_RATIO, startTime, endTime, step));
|
"pg_stat_database_blks_hit{instance=~\"" + hostId + "\",datname=\"postgres\"}"
|
||||||
|
+ " / "
|
||||||
|
+ "(pg_stat_database_blks_hit{instance=~\"" + hostId
|
||||||
|
+ "\",datname=\"postgres\"}"
|
||||||
|
+ " + pg_stat_database_blks_read{instance=~\"" + hostId
|
||||||
|
+ "\",datname=\"postgres\"})"
|
||||||
|
+ " * 100", startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Remaining space of the database log file", "B",
|
result.add(buildItemFromQuery("Remaining space of the database log file", "B",
|
||||||
WAL_SIZE, startTime, endTime, step));
|
"pg_settings_max_wal_size_bytes{instance=~\"" + hostId + "\"}", startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Database file size", "B",
|
result.add(buildItemFromQuery("Database file size", "B",
|
||||||
DATABASE_SIZE, startTime, endTime, step));
|
"sum(pg_database_size_bytes{instance=~\"" + hostId + "\"})", startTime, endTime, step));
|
||||||
break;
|
break;
|
||||||
case "Oracle":
|
case "Oracle":
|
||||||
//TODO 待修改,优化promql语句
|
//TODO 待修改,优化promql语句
|
||||||
//Oracle
|
//Oracle
|
||||||
result.add(buildItemFromQuery("Database memory state(Total memory)", "B",
|
result.add(buildItemFromQuery("Database memory state(Total memory)", "B",
|
||||||
"oracledb_sysmetric_total_pga_allocated{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"}",
|
"oracledb_sysmetric_total_pga_allocated{instance=\""+hostId+"\"}",
|
||||||
startTime, endTime, step));
|
startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Database user status(Logins per second)", "",
|
result.add(buildItemFromQuery("Database user status(Logins per second)", "",
|
||||||
"oracledb_sysmetric_logons_per_sec", startTime, endTime, step));
|
"oracledb_sysmetric_logons_per_sec", startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Database connection number", "",
|
result.add(buildItemFromQuery("Database connection number", "",
|
||||||
"oracledb_sysmetric_session_count", startTime, endTime, step));
|
"oracledb_sysmetric_session_count", startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Database connection response time", "",
|
result.add(buildItemFromQuery("Database connection response time", "",
|
||||||
"oracledb_sysmetric_sql_service_response_time{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"}",
|
"oracledb_sysmetric_sql_service_response_time{instance=\""+hostId+"\"}",
|
||||||
startTime, endTime, step));
|
startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Remaining space of the database log file", "B",
|
result.add(buildItemFromQuery("Remaining space of the database log file", "B",
|
||||||
"oracledb_env_redo_value{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\",name=\"log_size\"}",
|
"oracledb_env_redo_value{instance=\""+hostId+"\",name=\"log_size\"}",
|
||||||
startTime, endTime, step));
|
startTime, endTime, step));
|
||||||
result.add(buildItemFromQuery("Database file size", "B",
|
result.add(buildItemFromQuery("Database file size", "B",
|
||||||
"sum(oracledb_tablespace_bytes{instance=\"Oracle数据库(IP:172.21.170.10,实例:orcl实例)\"})",
|
"sum(oracledb_tablespace_bytes{instance=\""+hostId+"\"})",
|
||||||
startTime, endTime, step));
|
startTime, endTime, step));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -409,7 +390,6 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
//region 服务器与进程
|
//region 服务器与进程
|
||||||
@Override
|
@Override
|
||||||
public Result<?> detail(String hostId, String pageName, String start, String end) {
|
public Result<?> detail(String hostId, String pageName, String start, String end) {
|
||||||
hostId = "172.21.170.10";
|
|
||||||
|
|
||||||
switch (pageName) {
|
switch (pageName) {
|
||||||
case "serviceAndProcess":
|
case "serviceAndProcess":
|
||||||
|
|
@ -426,19 +406,25 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
private HostDetailsResult getHostDetails(String hostIp, String start, String end) {
|
private HostDetailsResult getHostDetails(String hostIp, String start, String end) {
|
||||||
long startTime = parseTimestamp(start);
|
long startTime = parseTimestamp(start);
|
||||||
long endTime = parseTimestamp(end);
|
long endTime = parseTimestamp(end);
|
||||||
|
long step = calculateStep(startTime, endTime);
|
||||||
|
String cpuQuery =
|
||||||
|
"topk(5,\n" +
|
||||||
|
" avg_over_time(\n" +
|
||||||
|
" sum by (groupname) (\n" +
|
||||||
|
" irate(namedprocess_namegroup_cpu_seconds_total[5m])\n" +
|
||||||
|
" )[5m:]\n" +
|
||||||
|
" )\n" +
|
||||||
|
")";
|
||||||
|
|
||||||
String cpuQuery = String.format(
|
|
||||||
"topk(5, sum by (groupname) (irate(namedprocess_namegroup_cpu_seconds_total{instance='%s'}[5m])))",
|
|
||||||
hostIp + ":9256");
|
|
||||||
|
|
||||||
String memQuery = String.format(
|
String memQuery = String.format(
|
||||||
"topk(5,namedprocess_namegroup_memory_bytes{instance=~\"%s\", memtype=\"resident\"}"
|
"topk(5,namedprocess_namegroup_memory_bytes{instance=~\"%s\", memtype=\"resident\"}"
|
||||||
+ "/scalar(max(node_memory_MemTotal_bytes{instance=~\"%s\"})))",
|
+ "/scalar(max(node_memory_MemTotal_bytes{instance=~\"%s\"})))",
|
||||||
hostIp + ":9256", hostIp + ":9100");
|
hostIp, hostIp);
|
||||||
|
|
||||||
HostDetailsResult result = new HostDetailsResult();
|
HostDetailsResult result = new HostDetailsResult();
|
||||||
result.setCpu(queryProcessMetrics(cpuQuery, startTime, endTime, true));
|
result.setCpu(queryProcessMetrics(cpuQuery, startTime, endTime, true, step));
|
||||||
result.setMemory(queryProcessMetrics(memQuery, startTime, endTime, true));
|
result.setMemory(queryProcessMetrics(memQuery, startTime, endTime, true, step));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -448,13 +434,18 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
* @param isPercent 是否乘以 100 转百分比
|
* @param isPercent 是否乘以 100 转百分比
|
||||||
*/
|
*/
|
||||||
private List<HostDetailsResult.ProcessMetric> queryProcessMetrics(
|
private List<HostDetailsResult.ProcessMetric> queryProcessMetrics(
|
||||||
String promql, long start, long end, boolean isPercent) {
|
String promql, long start, long end, boolean isPercent, long step) {
|
||||||
|
|
||||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, start, end, 60);
|
List<PrometheusUtil.QueryResult> resultList = queryRange(promql, start, end, step);
|
||||||
|
List<PrometheusUtil.QueryResult> safeSorted = resultList.stream()
|
||||||
|
.sorted(Comparator.comparingInt(qr ->
|
||||||
|
qr.getValues() == null ? 0 : qr.getValues().size()
|
||||||
|
))
|
||||||
|
.collect(Collectors.toList());
|
||||||
List<HostDetailsResult.ProcessMetric> metrics = new ArrayList<>();
|
List<HostDetailsResult.ProcessMetric> metrics = new ArrayList<>();
|
||||||
double factor = isPercent ? 100.0 : 1.0;
|
double factor = isPercent ? 100.0 : 1.0;
|
||||||
|
|
||||||
for (PrometheusUtil.QueryResult qr : results) {
|
for (PrometheusUtil.QueryResult qr : safeSorted) {
|
||||||
String name = qr.getMetrics() != null
|
String name = qr.getMetrics() != null
|
||||||
? qr.getMetrics().getOrDefault("groupname", "unknown")
|
? qr.getMetrics().getOrDefault("groupname", "unknown")
|
||||||
: "unknown";
|
: "unknown";
|
||||||
|
|
@ -465,8 +456,8 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
if (qr.getValues() != null) {
|
if (qr.getValues() != null) {
|
||||||
for (PrometheusUtil.DataPoint p : qr.getValues()) {
|
for (PrometheusUtil.DataPoint p : qr.getValues()) {
|
||||||
history.add(new HostDetailsResult.HistoryPoint(
|
history.add(new HostDetailsResult.HistoryPoint(
|
||||||
p.getTimestamp(), p.getValue()));
|
p.getTimestamp(), p.getValue() / 100));
|
||||||
stats.accept(p.getValue());
|
stats.accept(p.getValue() / 100);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -520,17 +511,20 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
@Override
|
@Override
|
||||||
public Result<?> item(String itemId, Integer itemType, String start, String end) {
|
public Result<?> item(String itemId, Integer itemType, String start, String end) {
|
||||||
MetricConfig config = MetricConfig.METRIC_MAP.get(itemId);
|
MetricConfig config = MetricConfig.METRIC_MAP.get(itemId);
|
||||||
|
//通过itemId查找监控项
|
||||||
|
AlarmItem alarmItem = alarmItemMapper.selectById(itemId);
|
||||||
|
|
||||||
if (config == null) {
|
if (config == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
String instance = alarmItem.getHostId();
|
||||||
try {
|
try {
|
||||||
long startTs = LocalDateTime.parse(start, FORMATTER)
|
long startTs = LocalDateTime.parse(start, FORMATTER)
|
||||||
.toEpochSecond(ZoneOffset.ofHours(8));
|
.toEpochSecond(ZoneOffset.ofHours(8));
|
||||||
long endTs = LocalDateTime.parse(end, FORMATTER)
|
long endTs = LocalDateTime.parse(end, FORMATTER)
|
||||||
.toEpochSecond(ZoneOffset.ofHours(8));
|
.toEpochSecond(ZoneOffset.ofHours(8));
|
||||||
|
|
||||||
String promql = String.format(config.getPromQl(), INSTANCE_9100, INSTANCE_9100);
|
String promql = String.format(config.getPromQl(), instance, instance);
|
||||||
List<PrometheusUtil.QueryResult> results = queryRange(promql, startTs, endTs, 60);
|
List<PrometheusUtil.QueryResult> results = queryRange(promql, startTs, endTs, 60);
|
||||||
|
|
||||||
List<Map<String, Object>> historyPoints = new ArrayList<>();
|
List<Map<String, Object>> historyPoints = new ArrayList<>();
|
||||||
|
|
@ -558,7 +552,7 @@ public class MonitorServiceImpl implements IMonitorService {
|
||||||
return Result.OK(finalResult);
|
return Result.OK(finalResult);
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("查询指标项失败: itemId={}", itemId, e);
|
log.error("查询指标项失败: instance={}", instance, e);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ import org.jeecg.common.system.vo.DictModel;
|
||||||
import org.jeecg.common.util.JDBCUtil;
|
import org.jeecg.common.util.JDBCUtil;
|
||||||
import org.jeecg.common.util.NumUtil;
|
import org.jeecg.common.util.NumUtil;
|
||||||
import org.jeecg.common.util.RedisUtil;
|
import org.jeecg.common.util.RedisUtil;
|
||||||
|
import org.jeecg.modules.Util.PrometheusAlertManager;
|
||||||
import org.jeecg.modules.Util.PrometheusUtil;
|
import org.jeecg.modules.Util.PrometheusUtil;
|
||||||
import org.jeecg.modules.base.dto.*;
|
import org.jeecg.modules.base.dto.*;
|
||||||
import org.jeecg.modules.base.entity.monitor.Host;
|
import org.jeecg.modules.base.entity.monitor.Host;
|
||||||
|
|
@ -44,6 +45,11 @@ import org.springframework.scheduling.annotation.Scheduled;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.FileStore;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -68,6 +74,8 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private PrometheusUtil prometheusUtil;
|
private PrometheusUtil prometheusUtil;
|
||||||
|
@Autowired
|
||||||
|
private PrometheusAlertManager prometheusAlertManager;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Result<?> findPage(QueryRequest query) {
|
public Result<?> findPage(QueryRequest query) {
|
||||||
|
|
@ -116,7 +124,9 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
|
||||||
online = prometheusUtil.isInstanceUp(instance);
|
online = prometheusUtil.isInstanceUp(instance);
|
||||||
databaseDto.setOnline(online);
|
databaseDto.setOnline(online);
|
||||||
// 如果数据库不在线 则不需要查询数据库的使用情况
|
// 如果数据库不在线 则不需要查询数据库的使用情况
|
||||||
if (!online) continue;
|
if (!online) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// 获取数据库指标信息
|
// 获取数据库指标信息
|
||||||
// dbMemory kb -> GB
|
// dbMemory kb -> GB
|
||||||
|
|
@ -374,6 +384,18 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
|
||||||
}
|
}
|
||||||
spaceInfos = template.query(DBSQL.SPACE_OR, mapper);
|
spaceInfos = template.query(DBSQL.SPACE_OR, mapper);
|
||||||
}
|
}
|
||||||
|
if (StrUtil.equals(dbType, POSTGRESQL.getType())) {
|
||||||
|
String dbUrl = sysDatabase.getDbUrl();
|
||||||
|
String dbDriver = sysDatabase.getDbDriver();
|
||||||
|
String dbUsername = sysDatabase.getDbUsername();
|
||||||
|
String dbPassword = sysDatabase.getDbPassword();
|
||||||
|
RowMapper<SpaceInfo> mapper = new SpaceRowMapper();
|
||||||
|
JdbcTemplate template = JDBCUtil.template(dbUrl, dbDriver, dbUsername, dbPassword);
|
||||||
|
if (ObjectUtil.isNull(template)) {
|
||||||
|
return spaceInfos;
|
||||||
|
}
|
||||||
|
spaceInfos = template.query(DBSQL.SPACE_PG, mapper);
|
||||||
|
}
|
||||||
return spaceInfos;
|
return spaceInfos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -541,7 +563,8 @@ public class SysDatabaseServiceImpl extends ServiceImpl<SysDatabaseMapper, SysDa
|
||||||
instance);
|
instance);
|
||||||
break;
|
break;
|
||||||
case LOGREMAININGSIZE:
|
case LOGREMAININGSIZE:
|
||||||
promql = String.format("pg_wal_size_bytes{instance=\"%s\"}", instance);
|
promql = String.format("pg_wal_size_bytes{instance=\"%s\"}/(1024 * 1024 )",
|
||||||
|
instance);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if ("Oracle".equalsIgnoreCase(dbType)) {
|
} else if ("Oracle".equalsIgnoreCase(dbType)) {
|
||||||
|
|
|
||||||
|
|
@ -396,47 +396,53 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
||||||
return Result.error("HostId" + Prompt.PARAM_REQUIRED);
|
return Result.error("HostId" + Prompt.PARAM_REQUIRED);
|
||||||
}
|
}
|
||||||
//查询条件
|
//查询条件
|
||||||
sysServer.setIpAddress("172.21.170.11");
|
String instance = sysServer.getIpAddress();
|
||||||
String instance = sysServer.getIpAddress() + ":9100";
|
|
||||||
BasicInfo basicInfo = new BasicInfo();
|
BasicInfo basicInfo = new BasicInfo();
|
||||||
|
|
||||||
|
|
||||||
// --- 静态元数据 (从 node_uname_info 标签获取) ---
|
// --- 静态元数据 (从 node_uname_info 标签获取) ---
|
||||||
String osInfoquery = String.format("node_os_info{instance=\"%s\"}", instance);
|
String osInfoquery = String.format("node_os_info{instance=\"%s\"}", instance);
|
||||||
JSONObject osInfo = queryPromSingleMetric(osInfoquery);
|
List<PrometheusUtil.QueryResult> queryResultList =
|
||||||
if (ObjectUtil.isNotNull(osInfo)) {
|
prometheusUtil.queryInstantFull(osInfoquery, null);
|
||||||
JSONObject labels = osInfo.getJSONObject("metric");
|
if (ObjectUtil.isNotNull(queryResultList) && !queryResultList.isEmpty()) {
|
||||||
basicInfo.setOsName(labels.getString("name"));
|
Map<String, String> labels = queryResultList.get(0).getMetrics();
|
||||||
basicInfo.setOsVersion(labels.getString("version"));
|
basicInfo.setOsName(labels.get("name"));
|
||||||
|
basicInfo.setOsVersion(labels.get("version"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 指标包含: nodename(hostname), release(osVersion), sysname(osName), machine(cpuType)
|
// 指标包含: nodename(hostname), release(osVersion), sysname(osName), machine(cpuType)
|
||||||
JSONObject unameMetric =
|
String unameQuery = String.format("node_uname_info{instance=\"%s\"}", instance);
|
||||||
queryPromSingleMetric("node_uname_info{instance=\"" + instance + "\"}");
|
List<PrometheusUtil.QueryResult> unameQueryList =
|
||||||
if (ObjectUtil.isNotNull(unameMetric)) {
|
prometheusUtil.queryInstantFull(unameQuery, null);
|
||||||
JSONObject labels = unameMetric.getJSONObject("metric");
|
|
||||||
basicInfo.setHostName(labels.getString("nodename"));
|
if (ObjectUtil.isNotNull(unameQueryList) && !unameQueryList.isEmpty()) {
|
||||||
basicInfo.setKernelVersion(labels.getString("release"));
|
Map<String, String> labels = unameQueryList.get(0).getMetrics();
|
||||||
|
|
||||||
|
basicInfo.setHostName(labels.get("nodename"));
|
||||||
|
basicInfo.setKernelVersion(labels.get("release"));
|
||||||
// basicInfo.setOsName(labels.getString("sysname"));
|
// basicInfo.setOsName(labels.getString("sysname"));
|
||||||
basicInfo.setCpuType(labels.getString("machine"));
|
basicInfo.setCpuType(labels.get("machine"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- 服务器状态与运行时间 ---
|
// --- 服务器状态与运行时间 ---
|
||||||
Double upValue = queryPromSingleValue("up{instance='" + instance + "'}");
|
Double upValue =
|
||||||
|
prometheusUtil.queryInstant("up{instance='" + instance + "'}");
|
||||||
basicInfo.setRunningState(ObjectUtil.isNotNull(upValue) && upValue == 1.0);
|
basicInfo.setRunningState(ObjectUtil.isNotNull(upValue) && upValue == 1.0);
|
||||||
|
|
||||||
Double bootTime = queryPromSingleValue(
|
Double bootTime =
|
||||||
|
prometheusUtil.queryInstant(
|
||||||
"time() - node_boot_time_seconds{instance='" + instance + "'}");
|
"time() - node_boot_time_seconds{instance='" + instance + "'}");
|
||||||
basicInfo.setRunTime(
|
basicInfo.setRunTime(
|
||||||
ObjectUtil.isNull(bootTime) ? "--" : NumberUtil.round(bootTime / 3600.0, 1) + "h");
|
ObjectUtil.isNull(bootTime) ? "--" : NumberUtil.round(bootTime / 3600.0, 1) + "h");
|
||||||
|
|
||||||
// --- 硬件规格 ---
|
// --- 硬件规格 ---
|
||||||
Double memTotal =
|
Double memTotal = prometheusUtil.queryInstant(
|
||||||
queryPromSingleValue("node_memory_MemTotal_bytes{instance='" + instance + "'}");
|
"node_memory_MemTotal_bytes{instance='" + instance + "'}");
|
||||||
basicInfo.setRamSize(ObjectUtil.isNull(memTotal) ? "--" :
|
basicInfo.setRamSize(ObjectUtil.isNull(memTotal) ? "--" :
|
||||||
NumberUtil.round(memTotal / 1024 / 1024 / 1024, 1) + "GB");
|
NumberUtil.round(memTotal / 1024 / 1024 / 1024, 1) + "GB");
|
||||||
|
|
||||||
Double cpuCores = queryPromSingleValue(
|
Double cpuCores =
|
||||||
|
prometheusUtil.queryInstant(
|
||||||
"count(node_cpu_seconds_total{instance='" + instance + "',mode='idle'})");
|
"count(node_cpu_seconds_total{instance='" + instance + "',mode='idle'})");
|
||||||
basicInfo.setCpuCores(
|
basicInfo.setCpuCores(
|
||||||
ObjectUtil.isNull(cpuCores) ? "--" : String.valueOf(cpuCores.intValue()));
|
ObjectUtil.isNull(cpuCores) ? "--" : String.valueOf(cpuCores.intValue()));
|
||||||
|
|
@ -446,34 +452,36 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
||||||
"count(node_filesystem_size_bytes{instance=\"%s\", fstype!~\"tmpfs|squashfs|overlay|autofs\"})",
|
"count(node_filesystem_size_bytes{instance=\"%s\", fstype!~\"tmpfs|squashfs|overlay|autofs\"})",
|
||||||
instance
|
instance
|
||||||
);
|
);
|
||||||
Double diskUsed = queryPromSingleValue(query);
|
Double diskUsed = prometheusUtil.queryInstant(query);
|
||||||
basicInfo.setTotalDiskPar(
|
basicInfo.setTotalDiskPar(
|
||||||
ObjectUtil.isNull(diskUsed) ? "--" : String.valueOf(diskUsed.intValue()));
|
ObjectUtil.isNull(diskUsed) ? "--" : String.valueOf(diskUsed.intValue()));
|
||||||
// 查询语句
|
// 查询语句
|
||||||
JSONObject dmiResult = getNodeDmiInfoMetric(
|
String dmiQuery = String.format(
|
||||||
"node_dmi_info * on(instance) group_left(time_zone) node_time_zone_offset_seconds {instance=\"" +
|
"node_dmi_info * on(instance) group_left(time_zone) node_time_zone_offset_seconds {instance=\"%s\"}",
|
||||||
instance + "\"}");
|
instance);
|
||||||
if (ObjectUtil.isNotNull(dmiResult)) {
|
List<PrometheusUtil.QueryResult> dmiResultList =
|
||||||
|
prometheusUtil.queryInstantFull(dmiQuery, null);
|
||||||
|
if (ObjectUtil.isNotNull(dmiResultList) && !dmiResultList.isEmpty()) {
|
||||||
|
|
||||||
JSONObject dmiMetric = dmiResult.getJSONObject("metric");
|
Map<String, String> dmiMetric = dmiResultList.get(0).getMetrics();
|
||||||
basicInfo.setBiosVersion(dmiMetric.getString("bios_version"));
|
basicInfo.setBiosVersion(dmiMetric.get("bios_version"));
|
||||||
basicInfo.setBiosSupplier(dmiMetric.getString("bios_vendor"));
|
basicInfo.setBiosSupplier(dmiMetric.get("bios_vendor"));
|
||||||
basicInfo.setManufacturer(dmiMetric.getString("system_vendor"));
|
basicInfo.setManufacturer(dmiMetric.get("system_vendor"));
|
||||||
basicInfo.setModelNumber(dmiMetric.getString("product_name"));
|
basicInfo.setModelNumber(dmiMetric.get("product_name"));
|
||||||
String timeZone = dmiMetric.getString("time_zone");
|
String timeZone = dmiMetric.get("time_zone");
|
||||||
basicInfo.setZone(timeZone);
|
basicInfo.setZone(timeZone);
|
||||||
}
|
}
|
||||||
|
|
||||||
//startTime
|
//startTime
|
||||||
JSONObject startTime =
|
|
||||||
getNodeDmiInfoMetric("node_boot_time_seconds{instance=\"" + instance + "\"}");
|
|
||||||
|
|
||||||
if (ObjectUtil.isNotNull(startTime)) {
|
List<PrometheusUtil.QueryResult> startTimeList = prometheusUtil.queryInstantFull(
|
||||||
|
"node_boot_time_seconds{instance=\"" + instance + "\"}", null);
|
||||||
|
if (ObjectUtil.isNotNull(startTimeList) && !startTimeList.isEmpty()) {
|
||||||
// 解析
|
// 解析
|
||||||
JSONArray array = startTime.getJSONArray("value");
|
PrometheusUtil.DataPoint dataPoint = startTimeList.get(0).getValue();
|
||||||
Integer time = null;
|
Integer time = null;
|
||||||
if (array != null && array.size() > 1) {
|
if (dataPoint != null) {
|
||||||
Object value = array.get(1);
|
Object value = dataPoint.getValue();
|
||||||
if (value instanceof String) {
|
if (value instanceof String) {
|
||||||
time = Double.valueOf((String) value).intValue();
|
time = Double.valueOf((String) value).intValue();
|
||||||
} else if (value instanceof Number) {
|
} else if (value instanceof Number) {
|
||||||
|
|
@ -487,21 +495,21 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
||||||
|
|
||||||
// --- 实时利用率 (仪表盘数据) ---
|
// --- 实时利用率 (仪表盘数据) ---
|
||||||
// CPU使用率 = 100 - idle
|
// CPU使用率 = 100 - idle
|
||||||
Double cpuUsedVal = queryPromSingleValue(
|
Double cpuUsedVal = prometheusUtil.queryInstant(
|
||||||
"100 - (avg(irate(node_cpu_seconds_total{instance='" + instance +
|
"100 - (avg(irate(node_cpu_seconds_total{instance='" + instance +
|
||||||
"',mode='idle'}[5m])) * 100)");
|
"',mode='idle'}[5m])) * 100)");
|
||||||
basicInfo.setCpuUsed(ObjectUtil.isNull(cpuUsedVal) ? 0.0 :
|
basicInfo.setCpuUsed(ObjectUtil.isNull(cpuUsedVal) ? 0.0 :
|
||||||
NumberUtil.round(cpuUsedVal, 1).doubleValue());
|
NumberUtil.round(cpuUsedVal, 1).doubleValue());
|
||||||
|
|
||||||
// 内存使用率 = (Total - Avail) / Total
|
// 内存使用率 = (Total - Avail) / Total
|
||||||
Double memUsedVal = queryPromSingleValue(
|
Double memUsedVal = prometheusUtil.queryInstant(
|
||||||
"(1 - (node_memory_MemAvailable_bytes{instance='" + instance +
|
"(1 - (node_memory_MemAvailable_bytes{instance='" + instance +
|
||||||
"'} / node_memory_MemTotal_bytes{instance='" + instance + "'})) * 100");
|
"'} / node_memory_MemTotal_bytes{instance='" + instance + "'})) * 100");
|
||||||
basicInfo.setMemoryUsed(
|
basicInfo.setMemoryUsed(
|
||||||
ObjectUtil.isNull(memUsedVal) ? 0.0 :
|
ObjectUtil.isNull(memUsedVal) ? 0.0 :
|
||||||
NumberUtil.round(memUsedVal, 1).doubleValue());
|
NumberUtil.round(memUsedVal, 1).doubleValue());
|
||||||
|
|
||||||
// 响应成功率 (Prometheus通常需要通过blackbox_exporter或ping,这里演示默认值)
|
// 响应成功率 ()
|
||||||
basicInfo.setResponseSuccessRate(100.0);
|
basicInfo.setResponseSuccessRate(100.0);
|
||||||
|
|
||||||
// --- 磁盘使用情况 (多分区解析) ---
|
// --- 磁盘使用情况 (多分区解析) ---
|
||||||
|
|
@ -511,7 +519,7 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
||||||
basicInfo.setIp(sysServer.getIpAddress());
|
basicInfo.setIp(sysServer.getIpAddress());
|
||||||
basicInfo.setLocation("DataCenter");
|
basicInfo.setLocation("DataCenter");
|
||||||
|
|
||||||
double netWorks = queryPromSingleValue(
|
double netWorks = prometheusUtil.queryInstant(
|
||||||
"count(node_network_info{operstate=\"up\",instance=\"" + instance + "\"})");
|
"count(node_network_info{operstate=\"up\",instance=\"" + instance + "\"})");
|
||||||
basicInfo.setNetwork(String.valueOf(netWorks));
|
basicInfo.setNetwork(String.valueOf(netWorks));
|
||||||
|
|
||||||
|
|
@ -674,26 +682,14 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
||||||
instance + "',fstype!~'tmpfs|udev|overlay'}) * 100";
|
instance + "',fstype!~'tmpfs|udev|overlay'}) * 100";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String url = PROM_URL + URLEncoder.encode(promql, StandardCharsets.UTF_8.name());
|
List<PrometheusUtil.QueryResult> queryResults =
|
||||||
// 1. 手动对 query 参数进行编码
|
prometheusUtil.queryInstantFull(promql, null);
|
||||||
String encodedQuery = URLEncoder.encode(promql, StandardCharsets.UTF_8.name());
|
|
||||||
|
|
||||||
// 2. 拼接完整的 URL 字符串
|
|
||||||
String fullUrl = PROM_URL + encodedQuery;
|
|
||||||
|
|
||||||
// 3. 【关键步骤】将 String 包装成 java.net.URI
|
|
||||||
// 这样 RestTemplate 就会原封不动地发送这个地址
|
|
||||||
java.net.URI uri = new java.net.URI(fullUrl);
|
|
||||||
|
|
||||||
String response = restTemplate.getForObject(uri, String.class);
|
|
||||||
JSONObject json = JSON.parseObject(response);
|
|
||||||
JSONArray resultArr = json.getJSONObject("data").getJSONArray("result");
|
|
||||||
|
|
||||||
for (int i = 0; i < resultArr.size(); i++) {
|
|
||||||
JSONObject item = resultArr.getJSONObject(i);
|
|
||||||
String mountpoint = item.getJSONObject("metric").getString("mountpoint");
|
|
||||||
Double val = item.getJSONArray("value").getDouble(1);
|
|
||||||
|
|
||||||
|
for (int i = 0; i < queryResults.size(); i++) {
|
||||||
|
Map<String, String> item = queryResults.get(i).getMetrics();
|
||||||
|
String mountpoint = item.get("mountpoint");
|
||||||
|
PrometheusUtil.DataPoint dataPoint = queryResults.get(i).getValue();
|
||||||
|
Double val = dataPoint.getValue();
|
||||||
xData.add(mountpoint);
|
xData.add(mountpoint);
|
||||||
yData.add(NumberUtil.round(val, 1).doubleValue());
|
yData.add(NumberUtil.round(val, 1).doubleValue());
|
||||||
}
|
}
|
||||||
|
|
@ -705,23 +701,6 @@ public class SysServerServiceImpl extends ServiceImpl<SysServerMapper, SysServer
|
||||||
return diskUsedValue;
|
return diskUsedValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
public JSONObject getNodeDmiInfoMetric(String query) {
|
|
||||||
|
|
||||||
try {
|
|
||||||
// 获取 Metric 标签的方法
|
|
||||||
JSONObject metricObj = queryPromSingleMetric(query);
|
|
||||||
|
|
||||||
if (ObjectUtil.isNotNull(metricObj)) {
|
|
||||||
|
|
||||||
|
|
||||||
return metricObj;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("获取 BIOS Version 失败: {}", e.getMessage());
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 时区值映射
|
* 时区值映射
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user