using BCards.Web.Models; using BCards.Web.Utils; using Microsoft.Extensions.Caching.Memory; using MongoDB.Driver; using HtmlAgilityPack; using System.Text.RegularExpressions; using System.Security.Cryptography; using System.Text; namespace BCards.Web.Services; public class OpenGraphService : IOpenGraphService { private readonly IMemoryCache _cache; private readonly ILogger _logger; private readonly HttpClient _httpClient; private readonly IMongoCollection _ogCache; public OpenGraphService( IMemoryCache cache, ILogger logger, HttpClient httpClient, IMongoDatabase database) { _cache = cache; _logger = logger; _httpClient = httpClient; _ogCache = database.GetCollection("openGraphCache"); // Configure HttpClient _httpClient.DefaultRequestHeaders.Clear(); //_httpClient.DefaultRequestHeaders.Add("User-Agent", // "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"); _httpClient.DefaultRequestHeaders.Add("User-Agent", "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)"); _httpClient.Timeout = TimeSpan.FromSeconds(10); } public async Task ExtractDataAsync(string url, string userId) { // 1. Validar domínio if (!AllowedDomains.IsAllowed(url)) { _logger.LogWarning("Tentativa de extração de domínio não permitido: {Url} pelo usuário {UserId}", url, userId); throw new InvalidOperationException("Domínio não permitido. Use apenas e-commerces conhecidos e seguros."); } // 2. Verificar rate limit (1 request por minuto por usuário) var rateLimitKey = $"og_rate_{userId}"; if (_cache.TryGetValue(rateLimitKey, out _)) { _logger.LogWarning("Rate limit excedido para usuário {UserId}", userId); throw new InvalidOperationException("Aguarde 1 minuto antes de extrair dados de outro produto."); } // 3. Verificar cache no MongoDB var urlHash = GenerateUrlHash(url); var cachedData = await GetCachedDataAsync(url); if (cachedData != null && cachedData.ExpiresAt > DateTime.UtcNow) { _logger.LogInformation("Retornando dados do cache MongoDB para URL: {Url}", url); return new OpenGraphData { Title = cachedData.Title, Description = cachedData.Description, Image = cachedData.Image, Price = cachedData.Price, Currency = cachedData.Currency, IsValid = cachedData.IsValid, ErrorMessage = cachedData.ErrorMessage }; } // 4. Extrair dados da URL var extractedData = await ExtractFromUrlAsync(url); // 5. Salvar no cache MongoDB await SaveToCacheAsync(url, urlHash, extractedData); // 6. Aplicar rate limit (1 minuto) _cache.Set(rateLimitKey, true, TimeSpan.FromMinutes(1)); _logger.LogInformation("Dados extraídos com sucesso para URL: {Url}", url); return extractedData; } public Task IsRateLimitedAsync(string userId) { var rateLimitKey = $"og_rate_{userId}"; return Task.FromResult(_cache.TryGetValue(rateLimitKey, out _)); } public async Task GetCachedDataAsync(string url) { var urlHash = GenerateUrlHash(url); return await _ogCache .Find(x => x.UrlHash == urlHash && x.ExpiresAt > DateTime.UtcNow) .FirstOrDefaultAsync(); } private async Task ExtractFromUrlAsync(string url) { try { _logger.LogInformation("Iniciando extração de dados para URL: {Url}", url); var response = await _httpClient.GetAsync(url); response.EnsureSuccessStatusCode(); var html = await response.Content.ReadAsStringAsync(); var doc = new HtmlDocument(); doc.LoadHtml(html); var title = GetMetaContent(doc, "og:title", "title") ?? GetTitleFromHTML(doc); var description = GetMetaContent(doc, "og:description", "description"); var image = GetMetaContent(doc, "og:image"); var price = GetMetaContent(doc, "og:price:amount") ?? ExtractPriceFromHTML(html, doc); var currency = GetMetaContent(doc, "og:price:currency") ?? "BRL"; // Limpar e validar dados title = CleanText(title); description = CleanText(description); price = CleanPrice(price); image = ValidateImageUrl(image, url); var isValid = !string.IsNullOrEmpty(title); return new OpenGraphData { Title = title, Description = description, Image = image, Price = price, Currency = currency, IsValid = isValid }; } catch (Exception ex) { _logger.LogWarning(ex, "Falha ao extrair dados de {Url}", url); return new OpenGraphData { IsValid = false, ErrorMessage = $"Erro ao processar a página: {ex.Message}" }; } } private string? GetMetaContent(HtmlDocument doc, params string[] properties) { foreach (var property in properties) { var meta = doc.DocumentNode .SelectSingleNode($"//meta[@property='{property}' or @name='{property}' or @itemprop='{property}']"); var content = meta?.GetAttributeValue("content", null); if (!string.IsNullOrWhiteSpace(content)) return content; } return null; } private string? GetTitleFromHTML(HtmlDocument doc) { var titleNode = doc.DocumentNode.SelectSingleNode("//title"); return titleNode?.InnerText?.Trim(); } private string? ExtractPriceFromHTML(string html, HtmlDocument doc) { // Regex patterns para encontrar preços em diferentes formatos var pricePatterns = new[] { @"R\$\s*[\d\.,]+", @"BRL\s*[\d\.,]+", @"[\$]\s*[\d\.,]+", @"price[^>]*>([^<]*[\d\.,]+[^<]*)<", @"valor[^>]*>([^<]*[\d\.,]+[^<]*)<", @"preço[^>]*>([^<]*[\d\.,]+[^<]*)<" }; foreach (var pattern in pricePatterns) { var match = Regex.Match(html, pattern, RegexOptions.IgnoreCase); if (match.Success) { return match.Value; } } // Tentar encontrar por seletores específicos var priceSelectors = new[] { ".price", ".valor", ".preco", "[data-price]", ".price-current", ".price-value", ".product-price", ".sale-price" }; foreach (var selector in priceSelectors) { var priceNode = doc.DocumentNode.SelectSingleNode($"//*[contains(@class, '{selector.Replace(".", "")}')]"); if (priceNode != null) { var priceText = priceNode.InnerText?.Trim(); if (Regex.IsMatch(priceText ?? "", @"[\d\.,]+")) { return priceText; } } } return null; } private string CleanText(string? text) { if (string.IsNullOrWhiteSpace(text)) return string.Empty; return Regex.Replace(text.Trim(), @"\s+", " "); } private string CleanPrice(string? price) { if (string.IsNullOrWhiteSpace(price)) return string.Empty; // Limpar e formatar preço var cleanPrice = Regex.Replace(price, @"[^\d\.,R\$]", " ").Trim(); return Regex.Replace(cleanPrice, @"\s+", " "); } private string ValidateImageUrl(string? imageUrl, string baseUrl) { if (string.IsNullOrWhiteSpace(imageUrl)) return string.Empty; try { // Se for URL relativa, converter para absoluta if (imageUrl.StartsWith("/")) { var baseUri = new Uri(baseUrl); return $"{baseUri.Scheme}://{baseUri.Host}{imageUrl}"; } // Validar se é uma URL válida if (Uri.TryCreate(imageUrl, UriKind.Absolute, out var uri)) { return uri.ToString(); } } catch (Exception ex) { _logger.LogWarning(ex, "Erro ao validar URL da imagem: {ImageUrl}", imageUrl); } return string.Empty; } private string GenerateUrlHash(string url) { using var sha256 = SHA256.Create(); var hashBytes = sha256.ComputeHash(Encoding.UTF8.GetBytes(url.ToLowerInvariant())); return Convert.ToBase64String(hashBytes); } private async Task SaveToCacheAsync(string url, string urlHash, OpenGraphData data) { try { var cacheItem = new OpenGraphCache { Url = url, UrlHash = urlHash, Title = data.Title, Description = data.Description, Image = data.Image, Price = data.Price, Currency = data.Currency, IsValid = data.IsValid, ErrorMessage = data.ErrorMessage, CachedAt = DateTime.UtcNow, ExpiresAt = data.IsValid ? DateTime.UtcNow.AddHours(24) : DateTime.UtcNow.AddHours(1) }; // Upsert no MongoDB await _ogCache.ReplaceOneAsync( x => x.UrlHash == urlHash, cacheItem, new ReplaceOptions { IsUpsert = true } ); } catch (Exception ex) { _logger.LogError(ex, "Erro ao salvar cache para URL: {Url}", url); } } }