using Microsoft.Extensions.Diagnostics.HealthChecks; using System.Diagnostics; namespace QRRapidoApp.Services.HealthChecks { public class ResourceHealthCheck : IHealthCheck { private readonly IConfiguration _configuration; private readonly ILogger _logger; private readonly double _cpuThresholdPercent; private readonly long _memoryThresholdMB; private readonly int _gcPressureThreshold; public ResourceHealthCheck( IConfiguration configuration, ILogger logger) { _configuration = configuration; _logger = logger; _cpuThresholdPercent = configuration.GetValue("HealthChecks:Resources:CpuThresholdPercent", 85.0); _memoryThresholdMB = configuration.GetValue("HealthChecks:Resources:MemoryThresholdMB", 600); _gcPressureThreshold = configuration.GetValue("HealthChecks:Resources:GcPressureThreshold", 15); } public async Task CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default) { try { var currentProcess = Process.GetCurrentProcess(); var data = new Dictionary(); // Memory usage var workingSetMB = currentProcess.WorkingSet64 / (1024 * 1024); var privateMemoryMB = currentProcess.PrivateMemorySize64 / (1024 * 1024); var virtualMemoryMB = currentProcess.VirtualMemorySize64 / (1024 * 1024); var gcTotalMemoryMB = GC.GetTotalMemory(false) / (1024 * 1024); // CPU usage estimation (simplified) var cpuUsagePercent = GetCpuUsageEstimate(); // GC statistics var gen0Collections = GC.CollectionCount(0); var gen1Collections = GC.CollectionCount(1); var gen2Collections = GC.CollectionCount(2); // Thread and handle counts var threadCount = currentProcess.Threads.Count; var handleCount = currentProcess.HandleCount; // Process uptime var uptime = DateTime.UtcNow - currentProcess.StartTime; // Populate health check data data["cpu"] = $"{cpuUsagePercent:F1}%"; data["memory"] = $"{workingSetMB}MB"; data["memoryPercent"] = CalculateMemoryPercent(workingSetMB); data["privateMemoryMB"] = privateMemoryMB; data["virtualMemoryMB"] = virtualMemoryMB; data["gcTotalMemoryMB"] = gcTotalMemoryMB; data["gen0Collections"] = gen0Collections; data["gen1Collections"] = gen1Collections; data["gen2Collections"] = gen2Collections; data["threadCount"] = threadCount; data["handleCount"] = handleCount; data["uptime"] = $"{uptime.Days}d {uptime.Hours}h {uptime.Minutes}m"; data["processId"] = currentProcess.Id; // Estimate GC pressure (rough approximation) var totalCollections = gen0Collections + gen1Collections + gen2Collections; var gcPressureValue = CalculateGcPressureValue(totalCollections, uptime); var gcPressure = EstimateGcPressure(totalCollections, uptime); data["gcPressure"] = gcPressure; // Determine overall status var issues = new List(); var warnings = new List(); // Check CPU if (cpuUsagePercent > _cpuThresholdPercent * 1.2) { issues.Add($"CPU usage critical ({cpuUsagePercent:F1}%)"); } else if (cpuUsagePercent > _cpuThresholdPercent) { warnings.Add($"CPU usage high ({cpuUsagePercent:F1}%)"); } // Check Memory if (workingSetMB > _memoryThresholdMB * 1.5) { issues.Add($"Memory usage critical ({workingSetMB}MB)"); } else if (workingSetMB > _memoryThresholdMB) { warnings.Add($"Memory usage high ({workingSetMB}MB)"); } // Check GC pressure if (gcPressureValue > _gcPressureThreshold * 2) { issues.Add($"GC pressure critical ({gcPressure})"); } else if (gcPressureValue > _gcPressureThreshold) { warnings.Add($"GC pressure high ({gcPressure})"); } // Check thread count (basic heuristic) if (threadCount > 200) { warnings.Add($"High thread count ({threadCount})"); } data["status"] = DetermineStatus(issues.Count, warnings.Count); // Return appropriate health status if (issues.Any()) { return HealthCheckResult.Unhealthy($"Resource issues detected: {string.Join(", ", issues)}", data: data); } if (warnings.Any()) { return HealthCheckResult.Degraded($"Resource warnings: {string.Join(", ", warnings)}", data: data); } return HealthCheckResult.Healthy($"Resource usage normal (CPU: {cpuUsagePercent:F1}%, Memory: {workingSetMB}MB)", data: data); } catch (Exception ex) { _logger.LogError(ex, "Resource health check failed"); return HealthCheckResult.Unhealthy($"Resource health check failed: {ex.Message}"); } } private double GetCpuUsageEstimate() { try { // Simple CPU usage estimation - this is approximate var startTime = DateTime.UtcNow; var startCpuUsage = Process.GetCurrentProcess().TotalProcessorTime; // Small delay to measure CPU usage Thread.Sleep(100); var endTime = DateTime.UtcNow; var endCpuUsage = Process.GetCurrentProcess().TotalProcessorTime; var cpuUsedMs = (endCpuUsage - startCpuUsage).TotalMilliseconds; var totalMsPassed = (endTime - startTime).TotalMilliseconds; var cpuUsageTotal = cpuUsedMs / (Environment.ProcessorCount * totalMsPassed); return Math.Min(100.0, Math.Max(0.0, cpuUsageTotal * 100)); } catch { // Return a reasonable default if CPU measurement fails return 0.0; } } private string CalculateMemoryPercent(long workingSetMB) { try { // Estimate system memory (this is approximate) var totalPhysicalMemory = GC.GetTotalMemory(false) + (workingSetMB * 1024 * 1024); var memoryPercent = (double)workingSetMB / (totalPhysicalMemory / (1024 * 1024)) * 100; return $"{Math.Min(100, Math.Max(0, memoryPercent)):F1}%"; } catch { return "unknown"; } } private double CalculateGcPressureValue(long totalCollections, TimeSpan uptime) { if (uptime.TotalMinutes < 1) { return 0.0; } return totalCollections / uptime.TotalMinutes; } private string EstimateGcPressure(long totalCollections, TimeSpan uptime) { if (uptime.TotalMinutes < 1) { return "low"; } var collectionsPerMinute = totalCollections / uptime.TotalMinutes; if (collectionsPerMinute > 20) { return "high"; } if (collectionsPerMinute > 10) { return "medium"; } return "low"; } private string DetermineStatus(int issueCount, int warningCount) { if (issueCount > 0) { return "critical"; } if (warningCount > 0) { return "warning"; } return "ok"; } } }