BCards/src/BCards.Web/Services/OpenGraphService.cs
Ricardo Carneiro 27ae8b606e feat:
+login ms que permite contas corporativas ou não.
+Links para produtos de afiliados
2025-06-25 19:30:19 -03:00

299 lines
10 KiB
C#

using BCards.Web.Models;
using BCards.Web.Utils;
using Microsoft.Extensions.Caching.Memory;
using MongoDB.Driver;
using HtmlAgilityPack;
using System.Text.RegularExpressions;
using System.Security.Cryptography;
using System.Text;
namespace BCards.Web.Services;
public class OpenGraphService : IOpenGraphService
{
private readonly IMemoryCache _cache;
private readonly ILogger<OpenGraphService> _logger;
private readonly HttpClient _httpClient;
private readonly IMongoCollection<OpenGraphCache> _ogCache;
public OpenGraphService(
IMemoryCache cache,
ILogger<OpenGraphService> logger,
HttpClient httpClient,
IMongoDatabase database)
{
_cache = cache;
_logger = logger;
_httpClient = httpClient;
_ogCache = database.GetCollection<OpenGraphCache>("openGraphCache");
// Configure HttpClient
_httpClient.DefaultRequestHeaders.Clear();
//_httpClient.DefaultRequestHeaders.Add("User-Agent",
// "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
_httpClient.DefaultRequestHeaders.Add("User-Agent",
"facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)");
_httpClient.Timeout = TimeSpan.FromSeconds(10);
}
public async Task<OpenGraphData> ExtractDataAsync(string url, string userId)
{
// 1. Validar domínio
if (!AllowedDomains.IsAllowed(url))
{
_logger.LogWarning("Tentativa de extração de domínio não permitido: {Url} pelo usuário {UserId}", url, userId);
throw new InvalidOperationException("Domínio não permitido. Use apenas e-commerces conhecidos e seguros.");
}
// 2. Verificar rate limit (1 request por minuto por usuário)
var rateLimitKey = $"og_rate_{userId}";
if (_cache.TryGetValue(rateLimitKey, out _))
{
_logger.LogWarning("Rate limit excedido para usuário {UserId}", userId);
throw new InvalidOperationException("Aguarde 1 minuto antes de extrair dados de outro produto.");
}
// 3. Verificar cache no MongoDB
var urlHash = GenerateUrlHash(url);
var cachedData = await GetCachedDataAsync(url);
if (cachedData != null && cachedData.ExpiresAt > DateTime.UtcNow)
{
_logger.LogInformation("Retornando dados do cache MongoDB para URL: {Url}", url);
return new OpenGraphData
{
Title = cachedData.Title,
Description = cachedData.Description,
Image = cachedData.Image,
Price = cachedData.Price,
Currency = cachedData.Currency,
IsValid = cachedData.IsValid,
ErrorMessage = cachedData.ErrorMessage
};
}
// 4. Extrair dados da URL
var extractedData = await ExtractFromUrlAsync(url);
// 5. Salvar no cache MongoDB
await SaveToCacheAsync(url, urlHash, extractedData);
// 6. Aplicar rate limit (1 minuto)
_cache.Set(rateLimitKey, true, TimeSpan.FromMinutes(1));
_logger.LogInformation("Dados extraídos com sucesso para URL: {Url}", url);
return extractedData;
}
public Task<bool> IsRateLimitedAsync(string userId)
{
var rateLimitKey = $"og_rate_{userId}";
return Task.FromResult(_cache.TryGetValue(rateLimitKey, out _));
}
public async Task<OpenGraphCache?> GetCachedDataAsync(string url)
{
var urlHash = GenerateUrlHash(url);
return await _ogCache
.Find(x => x.UrlHash == urlHash && x.ExpiresAt > DateTime.UtcNow)
.FirstOrDefaultAsync();
}
private async Task<OpenGraphData> ExtractFromUrlAsync(string url)
{
try
{
_logger.LogInformation("Iniciando extração de dados para URL: {Url}", url);
var response = await _httpClient.GetAsync(url);
response.EnsureSuccessStatusCode();
var html = await response.Content.ReadAsStringAsync();
var doc = new HtmlDocument();
doc.LoadHtml(html);
var title = GetMetaContent(doc, "og:title", "title") ?? GetTitleFromHTML(doc);
var description = GetMetaContent(doc, "og:description", "description");
var image = GetMetaContent(doc, "og:image");
var price = GetMetaContent(doc, "og:price:amount") ?? ExtractPriceFromHTML(html, doc);
var currency = GetMetaContent(doc, "og:price:currency") ?? "BRL";
// Limpar e validar dados
title = CleanText(title);
description = CleanText(description);
price = CleanPrice(price);
image = ValidateImageUrl(image, url);
var isValid = !string.IsNullOrEmpty(title);
return new OpenGraphData
{
Title = title,
Description = description,
Image = image,
Price = price,
Currency = currency,
IsValid = isValid
};
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Falha ao extrair dados de {Url}", url);
return new OpenGraphData
{
IsValid = false,
ErrorMessage = $"Erro ao processar a página: {ex.Message}"
};
}
}
private string? GetMetaContent(HtmlDocument doc, params string[] properties)
{
foreach (var property in properties)
{
var meta = doc.DocumentNode
.SelectSingleNode($"//meta[@property='{property}' or @name='{property}' or @itemprop='{property}']");
var content = meta?.GetAttributeValue("content", null);
if (!string.IsNullOrWhiteSpace(content))
return content;
}
return null;
}
private string? GetTitleFromHTML(HtmlDocument doc)
{
var titleNode = doc.DocumentNode.SelectSingleNode("//title");
return titleNode?.InnerText?.Trim();
}
private string? ExtractPriceFromHTML(string html, HtmlDocument doc)
{
// Regex patterns para encontrar preços em diferentes formatos
var pricePatterns = new[]
{
@"R\$\s*[\d\.,]+",
@"BRL\s*[\d\.,]+",
@"[\$]\s*[\d\.,]+",
@"price[^>]*>([^<]*[\d\.,]+[^<]*)<",
@"valor[^>]*>([^<]*[\d\.,]+[^<]*)<",
@"preço[^>]*>([^<]*[\d\.,]+[^<]*)<"
};
foreach (var pattern in pricePatterns)
{
var match = Regex.Match(html, pattern, RegexOptions.IgnoreCase);
if (match.Success)
{
return match.Value;
}
}
// Tentar encontrar por seletores específicos
var priceSelectors = new[]
{
".price", ".valor", ".preco", "[data-price]", ".price-current",
".price-value", ".product-price", ".sale-price"
};
foreach (var selector in priceSelectors)
{
var priceNode = doc.DocumentNode.SelectSingleNode($"//*[contains(@class, '{selector.Replace(".", "")}')]");
if (priceNode != null)
{
var priceText = priceNode.InnerText?.Trim();
if (Regex.IsMatch(priceText ?? "", @"[\d\.,]+"))
{
return priceText;
}
}
}
return null;
}
private string CleanText(string? text)
{
if (string.IsNullOrWhiteSpace(text))
return string.Empty;
return Regex.Replace(text.Trim(), @"\s+", " ");
}
private string CleanPrice(string? price)
{
if (string.IsNullOrWhiteSpace(price))
return string.Empty;
// Limpar e formatar preço
var cleanPrice = Regex.Replace(price, @"[^\d\.,R\$]", " ").Trim();
return Regex.Replace(cleanPrice, @"\s+", " ");
}
private string ValidateImageUrl(string? imageUrl, string baseUrl)
{
if (string.IsNullOrWhiteSpace(imageUrl))
return string.Empty;
try
{
// Se for URL relativa, converter para absoluta
if (imageUrl.StartsWith("/"))
{
var baseUri = new Uri(baseUrl);
return $"{baseUri.Scheme}://{baseUri.Host}{imageUrl}";
}
// Validar se é uma URL válida
if (Uri.TryCreate(imageUrl, UriKind.Absolute, out var uri))
{
return uri.ToString();
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Erro ao validar URL da imagem: {ImageUrl}", imageUrl);
}
return string.Empty;
}
private string GenerateUrlHash(string url)
{
using var sha256 = SHA256.Create();
var hashBytes = sha256.ComputeHash(Encoding.UTF8.GetBytes(url.ToLowerInvariant()));
return Convert.ToBase64String(hashBytes);
}
private async Task SaveToCacheAsync(string url, string urlHash, OpenGraphData data)
{
try
{
var cacheItem = new OpenGraphCache
{
Url = url,
UrlHash = urlHash,
Title = data.Title,
Description = data.Description,
Image = data.Image,
Price = data.Price,
Currency = data.Currency,
IsValid = data.IsValid,
ErrorMessage = data.ErrorMessage,
CachedAt = DateTime.UtcNow,
ExpiresAt = data.IsValid ? DateTime.UtcNow.AddHours(24) : DateTime.UtcNow.AddHours(1)
};
// Upsert no MongoDB
await _ogCache.ReplaceOneAsync(
x => x.UrlHash == urlHash,
cacheItem,
new ReplaceOptions { IsUpsert = true }
);
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro ao salvar cache para URL: {Url}", url);
}
}
}