227 lines
8.8 KiB
C#
227 lines
8.8 KiB
C#
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
|
using System.Diagnostics;
|
|
|
|
namespace QRRapidoApp.Services.HealthChecks
|
|
{
|
|
public class ResourceHealthCheck : IHealthCheck
|
|
{
|
|
private readonly IConfiguration _configuration;
|
|
private readonly ILogger<ResourceHealthCheck> _logger;
|
|
|
|
private readonly double _cpuThresholdPercent;
|
|
private readonly long _memoryThresholdMB;
|
|
private readonly int _gcPressureThreshold;
|
|
|
|
public ResourceHealthCheck(
|
|
IConfiguration configuration,
|
|
ILogger<ResourceHealthCheck> logger)
|
|
{
|
|
_configuration = configuration;
|
|
_logger = logger;
|
|
|
|
_cpuThresholdPercent = configuration.GetValue<double>("HealthChecks:Resources:CpuThresholdPercent", 85.0);
|
|
_memoryThresholdMB = configuration.GetValue<long>("HealthChecks:Resources:MemoryThresholdMB", 600);
|
|
_gcPressureThreshold = configuration.GetValue<int>("HealthChecks:Resources:GcPressureThreshold", 15);
|
|
}
|
|
|
|
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var currentProcess = Process.GetCurrentProcess();
|
|
var data = new Dictionary<string, object>();
|
|
|
|
// Memory usage
|
|
var workingSetMB = currentProcess.WorkingSet64 / (1024 * 1024);
|
|
var privateMemoryMB = currentProcess.PrivateMemorySize64 / (1024 * 1024);
|
|
var virtualMemoryMB = currentProcess.VirtualMemorySize64 / (1024 * 1024);
|
|
var gcTotalMemoryMB = GC.GetTotalMemory(false) / (1024 * 1024);
|
|
|
|
// CPU usage estimation (simplified)
|
|
var cpuUsagePercent = GetCpuUsageEstimate();
|
|
|
|
// GC statistics
|
|
var gen0Collections = GC.CollectionCount(0);
|
|
var gen1Collections = GC.CollectionCount(1);
|
|
var gen2Collections = GC.CollectionCount(2);
|
|
|
|
// Thread and handle counts
|
|
var threadCount = currentProcess.Threads.Count;
|
|
var handleCount = currentProcess.HandleCount;
|
|
|
|
// Process uptime
|
|
var uptime = DateTime.UtcNow - currentProcess.StartTime;
|
|
|
|
// Populate health check data
|
|
data["cpu"] = $"{cpuUsagePercent:F1}%";
|
|
data["memory"] = $"{workingSetMB}MB";
|
|
data["memoryPercent"] = CalculateMemoryPercent(workingSetMB);
|
|
data["privateMemoryMB"] = privateMemoryMB;
|
|
data["virtualMemoryMB"] = virtualMemoryMB;
|
|
data["gcTotalMemoryMB"] = gcTotalMemoryMB;
|
|
data["gen0Collections"] = gen0Collections;
|
|
data["gen1Collections"] = gen1Collections;
|
|
data["gen2Collections"] = gen2Collections;
|
|
data["threadCount"] = threadCount;
|
|
data["handleCount"] = handleCount;
|
|
data["uptime"] = $"{uptime.Days}d {uptime.Hours}h {uptime.Minutes}m";
|
|
data["processId"] = currentProcess.Id;
|
|
|
|
// Estimate GC pressure (rough approximation)
|
|
var totalCollections = gen0Collections + gen1Collections + gen2Collections;
|
|
var gcPressureValue = CalculateGcPressureValue(totalCollections, uptime);
|
|
var gcPressure = EstimateGcPressure(totalCollections, uptime);
|
|
data["gcPressure"] = gcPressure;
|
|
|
|
// Determine overall status
|
|
var issues = new List<string>();
|
|
var warnings = new List<string>();
|
|
|
|
// Check CPU
|
|
if (cpuUsagePercent > _cpuThresholdPercent * 1.2)
|
|
{
|
|
issues.Add($"CPU usage critical ({cpuUsagePercent:F1}%)");
|
|
}
|
|
else if (cpuUsagePercent > _cpuThresholdPercent)
|
|
{
|
|
warnings.Add($"CPU usage high ({cpuUsagePercent:F1}%)");
|
|
}
|
|
|
|
// Check Memory
|
|
if (workingSetMB > _memoryThresholdMB * 1.5)
|
|
{
|
|
issues.Add($"Memory usage critical ({workingSetMB}MB)");
|
|
}
|
|
else if (workingSetMB > _memoryThresholdMB)
|
|
{
|
|
warnings.Add($"Memory usage high ({workingSetMB}MB)");
|
|
}
|
|
|
|
// Check GC pressure
|
|
if (gcPressureValue > _gcPressureThreshold * 2)
|
|
{
|
|
issues.Add($"GC pressure critical ({gcPressure})");
|
|
}
|
|
else if (gcPressureValue > _gcPressureThreshold)
|
|
{
|
|
warnings.Add($"GC pressure high ({gcPressure})");
|
|
}
|
|
|
|
// Check thread count (basic heuristic)
|
|
if (threadCount > 200)
|
|
{
|
|
warnings.Add($"High thread count ({threadCount})");
|
|
}
|
|
|
|
data["status"] = DetermineStatus(issues.Count, warnings.Count);
|
|
|
|
// Return appropriate health status
|
|
if (issues.Any())
|
|
{
|
|
return HealthCheckResult.Unhealthy($"Resource issues detected: {string.Join(", ", issues)}", data: data);
|
|
}
|
|
|
|
if (warnings.Any())
|
|
{
|
|
return HealthCheckResult.Degraded($"Resource warnings: {string.Join(", ", warnings)}", data: data);
|
|
}
|
|
|
|
return HealthCheckResult.Healthy($"Resource usage normal (CPU: {cpuUsagePercent:F1}%, Memory: {workingSetMB}MB)", data: data);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Resource health check failed");
|
|
return HealthCheckResult.Unhealthy($"Resource health check failed: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
private double GetCpuUsageEstimate()
|
|
{
|
|
try
|
|
{
|
|
// Simple CPU usage estimation - this is approximate
|
|
var startTime = DateTime.UtcNow;
|
|
var startCpuUsage = Process.GetCurrentProcess().TotalProcessorTime;
|
|
|
|
// Small delay to measure CPU usage
|
|
Thread.Sleep(100);
|
|
|
|
var endTime = DateTime.UtcNow;
|
|
var endCpuUsage = Process.GetCurrentProcess().TotalProcessorTime;
|
|
|
|
var cpuUsedMs = (endCpuUsage - startCpuUsage).TotalMilliseconds;
|
|
var totalMsPassed = (endTime - startTime).TotalMilliseconds;
|
|
var cpuUsageTotal = cpuUsedMs / (Environment.ProcessorCount * totalMsPassed);
|
|
|
|
return Math.Min(100.0, Math.Max(0.0, cpuUsageTotal * 100));
|
|
}
|
|
catch
|
|
{
|
|
// Return a reasonable default if CPU measurement fails
|
|
return 0.0;
|
|
}
|
|
}
|
|
|
|
private string CalculateMemoryPercent(long workingSetMB)
|
|
{
|
|
try
|
|
{
|
|
// Estimate system memory (this is approximate)
|
|
var totalPhysicalMemory = GC.GetTotalMemory(false) + (workingSetMB * 1024 * 1024);
|
|
var memoryPercent = (double)workingSetMB / (totalPhysicalMemory / (1024 * 1024)) * 100;
|
|
return $"{Math.Min(100, Math.Max(0, memoryPercent)):F1}%";
|
|
}
|
|
catch
|
|
{
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
private double CalculateGcPressureValue(long totalCollections, TimeSpan uptime)
|
|
{
|
|
if (uptime.TotalMinutes < 1)
|
|
{
|
|
return 0.0;
|
|
}
|
|
|
|
return totalCollections / uptime.TotalMinutes;
|
|
}
|
|
|
|
private string EstimateGcPressure(long totalCollections, TimeSpan uptime)
|
|
{
|
|
if (uptime.TotalMinutes < 1)
|
|
{
|
|
return "low";
|
|
}
|
|
|
|
var collectionsPerMinute = totalCollections / uptime.TotalMinutes;
|
|
|
|
if (collectionsPerMinute > 20)
|
|
{
|
|
return "high";
|
|
}
|
|
|
|
if (collectionsPerMinute > 10)
|
|
{
|
|
return "medium";
|
|
}
|
|
|
|
return "low";
|
|
}
|
|
|
|
private string DetermineStatus(int issueCount, int warningCount)
|
|
{
|
|
if (issueCount > 0)
|
|
{
|
|
return "critical";
|
|
}
|
|
|
|
if (warningCount > 0)
|
|
{
|
|
return "warning";
|
|
}
|
|
|
|
return "ok";
|
|
}
|
|
}
|
|
} |