299 lines
10 KiB
C#
299 lines
10 KiB
C#
using BCards.Web.Models;
|
|
using BCards.Web.Utils;
|
|
using Microsoft.Extensions.Caching.Memory;
|
|
using MongoDB.Driver;
|
|
using HtmlAgilityPack;
|
|
using System.Text.RegularExpressions;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
|
|
namespace BCards.Web.Services;
|
|
|
|
public class OpenGraphService : IOpenGraphService
|
|
{
|
|
private readonly IMemoryCache _cache;
|
|
private readonly ILogger<OpenGraphService> _logger;
|
|
private readonly HttpClient _httpClient;
|
|
private readonly IMongoCollection<OpenGraphCache> _ogCache;
|
|
|
|
public OpenGraphService(
|
|
IMemoryCache cache,
|
|
ILogger<OpenGraphService> logger,
|
|
HttpClient httpClient,
|
|
IMongoDatabase database)
|
|
{
|
|
_cache = cache;
|
|
_logger = logger;
|
|
_httpClient = httpClient;
|
|
_ogCache = database.GetCollection<OpenGraphCache>("openGraphCache");
|
|
|
|
// Configure HttpClient
|
|
_httpClient.DefaultRequestHeaders.Clear();
|
|
//_httpClient.DefaultRequestHeaders.Add("User-Agent",
|
|
// "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
|
|
_httpClient.DefaultRequestHeaders.Add("User-Agent",
|
|
"facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)");
|
|
_httpClient.Timeout = TimeSpan.FromSeconds(10);
|
|
}
|
|
|
|
public async Task<OpenGraphData> ExtractDataAsync(string url, string userId)
|
|
{
|
|
// 1. Validar domínio
|
|
if (!AllowedDomains.IsAllowed(url))
|
|
{
|
|
_logger.LogWarning("Tentativa de extração de domínio não permitido: {Url} pelo usuário {UserId}", url, userId);
|
|
throw new InvalidOperationException("Domínio não permitido. Use apenas e-commerces conhecidos e seguros.");
|
|
}
|
|
|
|
// 2. Verificar rate limit (1 request por minuto por usuário)
|
|
var rateLimitKey = $"og_rate_{userId}";
|
|
if (_cache.TryGetValue(rateLimitKey, out _))
|
|
{
|
|
_logger.LogWarning("Rate limit excedido para usuário {UserId}", userId);
|
|
throw new InvalidOperationException("Aguarde 1 minuto antes de extrair dados de outro produto.");
|
|
}
|
|
|
|
// 3. Verificar cache no MongoDB
|
|
var urlHash = GenerateUrlHash(url);
|
|
var cachedData = await GetCachedDataAsync(url);
|
|
|
|
if (cachedData != null && cachedData.ExpiresAt > DateTime.UtcNow)
|
|
{
|
|
_logger.LogInformation("Retornando dados do cache MongoDB para URL: {Url}", url);
|
|
return new OpenGraphData
|
|
{
|
|
Title = cachedData.Title,
|
|
Description = cachedData.Description,
|
|
Image = cachedData.Image,
|
|
Price = cachedData.Price,
|
|
Currency = cachedData.Currency,
|
|
IsValid = cachedData.IsValid,
|
|
ErrorMessage = cachedData.ErrorMessage
|
|
};
|
|
}
|
|
|
|
// 4. Extrair dados da URL
|
|
var extractedData = await ExtractFromUrlAsync(url);
|
|
|
|
// 5. Salvar no cache MongoDB
|
|
await SaveToCacheAsync(url, urlHash, extractedData);
|
|
|
|
// 6. Aplicar rate limit (1 minuto)
|
|
_cache.Set(rateLimitKey, true, TimeSpan.FromMinutes(1));
|
|
|
|
_logger.LogInformation("Dados extraídos com sucesso para URL: {Url}", url);
|
|
return extractedData;
|
|
}
|
|
|
|
public Task<bool> IsRateLimitedAsync(string userId)
|
|
{
|
|
var rateLimitKey = $"og_rate_{userId}";
|
|
return Task.FromResult(_cache.TryGetValue(rateLimitKey, out _));
|
|
}
|
|
|
|
public async Task<OpenGraphCache?> GetCachedDataAsync(string url)
|
|
{
|
|
var urlHash = GenerateUrlHash(url);
|
|
return await _ogCache
|
|
.Find(x => x.UrlHash == urlHash && x.ExpiresAt > DateTime.UtcNow)
|
|
.FirstOrDefaultAsync();
|
|
}
|
|
|
|
private async Task<OpenGraphData> ExtractFromUrlAsync(string url)
|
|
{
|
|
try
|
|
{
|
|
_logger.LogInformation("Iniciando extração de dados para URL: {Url}", url);
|
|
|
|
var response = await _httpClient.GetAsync(url);
|
|
response.EnsureSuccessStatusCode();
|
|
|
|
var html = await response.Content.ReadAsStringAsync();
|
|
var doc = new HtmlDocument();
|
|
doc.LoadHtml(html);
|
|
|
|
var title = GetMetaContent(doc, "og:title", "title") ?? GetTitleFromHTML(doc);
|
|
var description = GetMetaContent(doc, "og:description", "description");
|
|
var image = GetMetaContent(doc, "og:image");
|
|
var price = GetMetaContent(doc, "og:price:amount") ?? ExtractPriceFromHTML(html, doc);
|
|
var currency = GetMetaContent(doc, "og:price:currency") ?? "BRL";
|
|
|
|
// Limpar e validar dados
|
|
title = CleanText(title);
|
|
description = CleanText(description);
|
|
price = CleanPrice(price);
|
|
image = ValidateImageUrl(image, url);
|
|
|
|
var isValid = !string.IsNullOrEmpty(title);
|
|
|
|
return new OpenGraphData
|
|
{
|
|
Title = title,
|
|
Description = description,
|
|
Image = image,
|
|
Price = price,
|
|
Currency = currency,
|
|
IsValid = isValid
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Falha ao extrair dados de {Url}", url);
|
|
return new OpenGraphData
|
|
{
|
|
IsValid = false,
|
|
ErrorMessage = $"Erro ao processar a página: {ex.Message}"
|
|
};
|
|
}
|
|
}
|
|
|
|
private string? GetMetaContent(HtmlDocument doc, params string[] properties)
|
|
{
|
|
foreach (var property in properties)
|
|
{
|
|
var meta = doc.DocumentNode
|
|
.SelectSingleNode($"//meta[@property='{property}' or @name='{property}' or @itemprop='{property}']");
|
|
|
|
var content = meta?.GetAttributeValue("content", null);
|
|
if (!string.IsNullOrWhiteSpace(content))
|
|
return content;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private string? GetTitleFromHTML(HtmlDocument doc)
|
|
{
|
|
var titleNode = doc.DocumentNode.SelectSingleNode("//title");
|
|
return titleNode?.InnerText?.Trim();
|
|
}
|
|
|
|
private string? ExtractPriceFromHTML(string html, HtmlDocument doc)
|
|
{
|
|
// Regex patterns para encontrar preços em diferentes formatos
|
|
var pricePatterns = new[]
|
|
{
|
|
@"R\$\s*[\d\.,]+",
|
|
@"BRL\s*[\d\.,]+",
|
|
@"[\$]\s*[\d\.,]+",
|
|
@"price[^>]*>([^<]*[\d\.,]+[^<]*)<",
|
|
@"valor[^>]*>([^<]*[\d\.,]+[^<]*)<",
|
|
@"preço[^>]*>([^<]*[\d\.,]+[^<]*)<"
|
|
};
|
|
|
|
foreach (var pattern in pricePatterns)
|
|
{
|
|
var match = Regex.Match(html, pattern, RegexOptions.IgnoreCase);
|
|
if (match.Success)
|
|
{
|
|
return match.Value;
|
|
}
|
|
}
|
|
|
|
// Tentar encontrar por seletores específicos
|
|
var priceSelectors = new[]
|
|
{
|
|
".price", ".valor", ".preco", "[data-price]", ".price-current",
|
|
".price-value", ".product-price", ".sale-price"
|
|
};
|
|
|
|
foreach (var selector in priceSelectors)
|
|
{
|
|
var priceNode = doc.DocumentNode.SelectSingleNode($"//*[contains(@class, '{selector.Replace(".", "")}')]");
|
|
if (priceNode != null)
|
|
{
|
|
var priceText = priceNode.InnerText?.Trim();
|
|
if (Regex.IsMatch(priceText ?? "", @"[\d\.,]+"))
|
|
{
|
|
return priceText;
|
|
}
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private string CleanText(string? text)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(text))
|
|
return string.Empty;
|
|
|
|
return Regex.Replace(text.Trim(), @"\s+", " ");
|
|
}
|
|
|
|
private string CleanPrice(string? price)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(price))
|
|
return string.Empty;
|
|
|
|
// Limpar e formatar preço
|
|
var cleanPrice = Regex.Replace(price, @"[^\d\.,R\$]", " ").Trim();
|
|
return Regex.Replace(cleanPrice, @"\s+", " ");
|
|
}
|
|
|
|
private string ValidateImageUrl(string? imageUrl, string baseUrl)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(imageUrl))
|
|
return string.Empty;
|
|
|
|
try
|
|
{
|
|
// Se for URL relativa, converter para absoluta
|
|
if (imageUrl.StartsWith("/"))
|
|
{
|
|
var baseUri = new Uri(baseUrl);
|
|
return $"{baseUri.Scheme}://{baseUri.Host}{imageUrl}";
|
|
}
|
|
|
|
// Validar se é uma URL válida
|
|
if (Uri.TryCreate(imageUrl, UriKind.Absolute, out var uri))
|
|
{
|
|
return uri.ToString();
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Erro ao validar URL da imagem: {ImageUrl}", imageUrl);
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
|
|
private string GenerateUrlHash(string url)
|
|
{
|
|
using var sha256 = SHA256.Create();
|
|
var hashBytes = sha256.ComputeHash(Encoding.UTF8.GetBytes(url.ToLowerInvariant()));
|
|
return Convert.ToBase64String(hashBytes);
|
|
}
|
|
|
|
private async Task SaveToCacheAsync(string url, string urlHash, OpenGraphData data)
|
|
{
|
|
try
|
|
{
|
|
var cacheItem = new OpenGraphCache
|
|
{
|
|
Url = url,
|
|
UrlHash = urlHash,
|
|
Title = data.Title,
|
|
Description = data.Description,
|
|
Image = data.Image,
|
|
Price = data.Price,
|
|
Currency = data.Currency,
|
|
IsValid = data.IsValid,
|
|
ErrorMessage = data.ErrorMessage,
|
|
CachedAt = DateTime.UtcNow,
|
|
ExpiresAt = data.IsValid ? DateTime.UtcNow.AddHours(24) : DateTime.UtcNow.AddHours(1)
|
|
};
|
|
|
|
// Upsert no MongoDB
|
|
await _ogCache.ReplaceOneAsync(
|
|
x => x.UrlHash == urlHash,
|
|
cacheItem,
|
|
new ReplaceOptions { IsUpsert = true }
|
|
);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao salvar cache para URL: {Url}", url);
|
|
}
|
|
}
|
|
} |