QrRapido/Services/HealthChecks/ResourceHealthCheck.cs
2025-07-28 18:22:47 -03:00

227 lines
8.8 KiB
C#

using Microsoft.Extensions.Diagnostics.HealthChecks;
using System.Diagnostics;
namespace QRRapidoApp.Services.HealthChecks
{
public class ResourceHealthCheck : IHealthCheck
{
private readonly IConfiguration _configuration;
private readonly ILogger<ResourceHealthCheck> _logger;
private readonly double _cpuThresholdPercent;
private readonly long _memoryThresholdMB;
private readonly int _gcPressureThreshold;
public ResourceHealthCheck(
IConfiguration configuration,
ILogger<ResourceHealthCheck> logger)
{
_configuration = configuration;
_logger = logger;
_cpuThresholdPercent = configuration.GetValue<double>("HealthChecks:Resources:CpuThresholdPercent", 85.0);
_memoryThresholdMB = configuration.GetValue<long>("HealthChecks:Resources:MemoryThresholdMB", 600);
_gcPressureThreshold = configuration.GetValue<int>("HealthChecks:Resources:GcPressureThreshold", 15);
}
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
{
try
{
var currentProcess = Process.GetCurrentProcess();
var data = new Dictionary<string, object>();
// Memory usage
var workingSetMB = currentProcess.WorkingSet64 / (1024 * 1024);
var privateMemoryMB = currentProcess.PrivateMemorySize64 / (1024 * 1024);
var virtualMemoryMB = currentProcess.VirtualMemorySize64 / (1024 * 1024);
var gcTotalMemoryMB = GC.GetTotalMemory(false) / (1024 * 1024);
// CPU usage estimation (simplified)
var cpuUsagePercent = GetCpuUsageEstimate();
// GC statistics
var gen0Collections = GC.CollectionCount(0);
var gen1Collections = GC.CollectionCount(1);
var gen2Collections = GC.CollectionCount(2);
// Thread and handle counts
var threadCount = currentProcess.Threads.Count;
var handleCount = currentProcess.HandleCount;
// Process uptime
var uptime = DateTime.UtcNow - currentProcess.StartTime;
// Populate health check data
data["cpu"] = $"{cpuUsagePercent:F1}%";
data["memory"] = $"{workingSetMB}MB";
data["memoryPercent"] = CalculateMemoryPercent(workingSetMB);
data["privateMemoryMB"] = privateMemoryMB;
data["virtualMemoryMB"] = virtualMemoryMB;
data["gcTotalMemoryMB"] = gcTotalMemoryMB;
data["gen0Collections"] = gen0Collections;
data["gen1Collections"] = gen1Collections;
data["gen2Collections"] = gen2Collections;
data["threadCount"] = threadCount;
data["handleCount"] = handleCount;
data["uptime"] = $"{uptime.Days}d {uptime.Hours}h {uptime.Minutes}m";
data["processId"] = currentProcess.Id;
// Estimate GC pressure (rough approximation)
var totalCollections = gen0Collections + gen1Collections + gen2Collections;
var gcPressureValue = CalculateGcPressureValue(totalCollections, uptime);
var gcPressure = EstimateGcPressure(totalCollections, uptime);
data["gcPressure"] = gcPressure;
// Determine overall status
var issues = new List<string>();
var warnings = new List<string>();
// Check CPU
if (cpuUsagePercent > _cpuThresholdPercent * 1.2)
{
issues.Add($"CPU usage critical ({cpuUsagePercent:F1}%)");
}
else if (cpuUsagePercent > _cpuThresholdPercent)
{
warnings.Add($"CPU usage high ({cpuUsagePercent:F1}%)");
}
// Check Memory
if (workingSetMB > _memoryThresholdMB * 1.5)
{
issues.Add($"Memory usage critical ({workingSetMB}MB)");
}
else if (workingSetMB > _memoryThresholdMB)
{
warnings.Add($"Memory usage high ({workingSetMB}MB)");
}
// Check GC pressure
if (gcPressureValue > _gcPressureThreshold * 2)
{
issues.Add($"GC pressure critical ({gcPressure})");
}
else if (gcPressureValue > _gcPressureThreshold)
{
warnings.Add($"GC pressure high ({gcPressure})");
}
// Check thread count (basic heuristic)
if (threadCount > 200)
{
warnings.Add($"High thread count ({threadCount})");
}
data["status"] = DetermineStatus(issues.Count, warnings.Count);
// Return appropriate health status
if (issues.Any())
{
return HealthCheckResult.Unhealthy($"Resource issues detected: {string.Join(", ", issues)}", data: data);
}
if (warnings.Any())
{
return HealthCheckResult.Degraded($"Resource warnings: {string.Join(", ", warnings)}", data: data);
}
return HealthCheckResult.Healthy($"Resource usage normal (CPU: {cpuUsagePercent:F1}%, Memory: {workingSetMB}MB)", data: data);
}
catch (Exception ex)
{
_logger.LogError(ex, "Resource health check failed");
return HealthCheckResult.Unhealthy($"Resource health check failed: {ex.Message}");
}
}
private double GetCpuUsageEstimate()
{
try
{
// Simple CPU usage estimation - this is approximate
var startTime = DateTime.UtcNow;
var startCpuUsage = Process.GetCurrentProcess().TotalProcessorTime;
// Small delay to measure CPU usage
Thread.Sleep(100);
var endTime = DateTime.UtcNow;
var endCpuUsage = Process.GetCurrentProcess().TotalProcessorTime;
var cpuUsedMs = (endCpuUsage - startCpuUsage).TotalMilliseconds;
var totalMsPassed = (endTime - startTime).TotalMilliseconds;
var cpuUsageTotal = cpuUsedMs / (Environment.ProcessorCount * totalMsPassed);
return Math.Min(100.0, Math.Max(0.0, cpuUsageTotal * 100));
}
catch
{
// Return a reasonable default if CPU measurement fails
return 0.0;
}
}
private string CalculateMemoryPercent(long workingSetMB)
{
try
{
// Estimate system memory (this is approximate)
var totalPhysicalMemory = GC.GetTotalMemory(false) + (workingSetMB * 1024 * 1024);
var memoryPercent = (double)workingSetMB / (totalPhysicalMemory / (1024 * 1024)) * 100;
return $"{Math.Min(100, Math.Max(0, memoryPercent)):F1}%";
}
catch
{
return "unknown";
}
}
private double CalculateGcPressureValue(long totalCollections, TimeSpan uptime)
{
if (uptime.TotalMinutes < 1)
{
return 0.0;
}
return totalCollections / uptime.TotalMinutes;
}
private string EstimateGcPressure(long totalCollections, TimeSpan uptime)
{
if (uptime.TotalMinutes < 1)
{
return "low";
}
var collectionsPerMinute = totalCollections / uptime.TotalMinutes;
if (collectionsPerMinute > 20)
{
return "high";
}
if (collectionsPerMinute > 10)
{
return "medium";
}
return "low";
}
private string DetermineStatus(int issueCount, int warningCount)
{
if (issueCount > 0)
{
return "critical";
}
if (warningCount > 0)
{
return "warning";
}
return "ok";
}
}
}