BCards/src/BCards.Web/HealthChecks/SystemResourcesHealthCheck.cs
Ricardo Carneiro 90cc01d7cf
All checks were successful
BCards Deployment Pipeline / Run Tests (push) Successful in 1s
BCards Deployment Pipeline / PR Validation (push) Has been skipped
BCards Deployment Pipeline / Build and Push Image (push) Successful in 15m39s
BCards Deployment Pipeline / Deploy to Production (ARM - OCI) (push) Successful in 1m17s
BCards Deployment Pipeline / Deploy to Staging (x86 - Local) (push) Has been skipped
BCards Deployment Pipeline / Cleanup Old Resources (push) Has been skipped
BCards Deployment Pipeline / Deployment Summary (push) Successful in 0s
feat: heath checks, seq e logs
2025-08-24 20:00:53 -03:00

134 lines
4.9 KiB
C#

using Microsoft.Extensions.Diagnostics.HealthChecks;
using System.Diagnostics;
namespace BCards.Web.HealthChecks;
public class SystemResourcesHealthCheck : IHealthCheck
{
private readonly ILogger<SystemResourcesHealthCheck> _logger;
private static readonly DateTime _startTime = DateTime.UtcNow;
public SystemResourcesHealthCheck(ILogger<SystemResourcesHealthCheck> logger)
{
_logger = logger;
}
public Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
var stopwatch = Stopwatch.StartNew();
try
{
// Informações de memória
var totalMemory = GC.GetTotalMemory(false);
var workingSet = Environment.WorkingSet;
// Informações do processo atual
using var currentProcess = Process.GetCurrentProcess();
var cpuUsage = GetCpuUsage(currentProcess);
// Uptime
var uptime = DateTime.UtcNow - _startTime;
var uptimeString = FormatUptime(uptime);
// Thread count
var threadCount = currentProcess.Threads.Count;
stopwatch.Stop();
var duration = stopwatch.ElapsedMilliseconds;
var data = new Dictionary<string, object>
{
{ "status", "healthy" },
{ "duration", $"{duration}ms" },
{ "memory", new Dictionary<string, object>
{
{ "total_managed_mb", Math.Round(totalMemory / 1024.0 / 1024.0, 2) },
{ "working_set_mb", Math.Round(workingSet / 1024.0 / 1024.0, 2) },
{ "gc_generation_0", GC.CollectionCount(0) },
{ "gc_generation_1", GC.CollectionCount(1) },
{ "gc_generation_2", GC.CollectionCount(2) }
}
},
{ "process", new Dictionary<string, object>
{
{ "id", currentProcess.Id },
{ "threads", threadCount },
{ "handles", currentProcess.HandleCount },
{ "uptime", uptimeString },
{ "uptime_seconds", (int)uptime.TotalSeconds }
}
},
{ "system", new Dictionary<string, object>
{
{ "processor_count", Environment.ProcessorCount },
{ "os_version", Environment.OSVersion.ToString() },
{ "machine_name", Environment.MachineName },
{ "user_name", Environment.UserName }
}
}
};
_logger.LogInformation("System resources health check completed in {Duration}ms - Memory: {Memory}MB, Threads: {Threads}",
duration, Math.Round(totalMemory / 1024.0 / 1024.0, 1), threadCount);
// Definir thresholds para status
var memoryMb = totalMemory / 1024.0 / 1024.0;
if (memoryMb > 1000) // > 1GB
{
data["status"] = "degraded";
return Task.FromResult(HealthCheckResult.Degraded($"High memory usage: {memoryMb:F1}MB", data: data));
}
if (threadCount > 500)
{
data["status"] = "degraded";
return Task.FromResult(HealthCheckResult.Degraded($"High thread count: {threadCount}", data: data));
}
return Task.FromResult(HealthCheckResult.Healthy($"System resources normal (Memory: {memoryMb:F1}MB, Threads: {threadCount})", data: data));
}
catch (Exception ex)
{
stopwatch.Stop();
var duration = stopwatch.ElapsedMilliseconds;
_logger.LogError(ex, "System resources health check failed after {Duration}ms", duration);
var data = new Dictionary<string, object>
{
{ "status", "unhealthy" },
{ "duration", $"{duration}ms" },
{ "error", ex.Message }
};
return Task.FromResult(HealthCheckResult.Unhealthy($"System resources check failed: {ex.Message}", ex, data));
}
}
private static double GetCpuUsage(Process process)
{
try
{
return process.TotalProcessorTime.TotalMilliseconds;
}
catch
{
return 0;
}
}
private static string FormatUptime(TimeSpan uptime)
{
if (uptime.TotalDays >= 1)
return $"{(int)uptime.TotalDays}d {uptime.Hours}h {uptime.Minutes}m";
if (uptime.TotalHours >= 1)
return $"{uptime.Hours}h {uptime.Minutes}m";
if (uptime.TotalMinutes >= 1)
return $"{uptime.Minutes}m {uptime.Seconds}s";
return $"{uptime.Seconds}s";
}
}