#pragma warning disable SKEXP0001 using ChatRAG.Contracts.VectorSearch; using ChatRAG.Data; using ChatRAG.Models; using ChatRAG.Services.Contracts; using Microsoft.SemanticKernel.Embeddings; using System.Text; using System.Collections.Concurrent; namespace ChatRAG.Services.TextServices { public class QdrantTextDataService : ITextDataService { private readonly IVectorSearchService _vectorSearchService; private readonly ITextEmbeddingGenerationService _embeddingService; private readonly ILogger _logger; // Cache para project IDs para evitar buscas custosas private readonly ConcurrentDictionary _projectIdCache = new(); private readonly TimeSpan _cacheTimeout = TimeSpan.FromMinutes(5); public QdrantTextDataService( IVectorSearchService vectorSearchService, ITextEmbeddingGenerationService embeddingService, ILogger logger) { _vectorSearchService = vectorSearchService; _embeddingService = embeddingService; _logger = logger; } public string ProviderName => "Qdrant"; // ======================================== // MÉTODOS ORIGINAIS (compatibilidade com MongoDB) // ======================================== public async Task SalvarNoMongoDB(string titulo, string texto, string projectId) { await SalvarNoMongoDB(null, titulo, texto, projectId); } public async Task SalvarNoMongoDB(string? id, string titulo, string texto, string projectId) { try { var conteudo = $"**{titulo}** \n\n {texto}"; // Gera embedding uma única vez var embedding = await GenerateEmbeddingOptimized(conteudo); if (string.IsNullOrEmpty(id)) { // Cria novo documento var newId = await _vectorSearchService.AddDocumentAsync(titulo, texto, projectId, embedding); // Atualiza cache de project IDs _projectIdCache.TryAdd(projectId, DateTime.UtcNow); _logger.LogDebug("Documento '{Title}' criado no Qdrant com ID {Id}", titulo, newId); } else { // Atualiza documento existente await _vectorSearchService.UpdateDocumentAsync(id, titulo, texto, projectId, embedding); _logger.LogDebug("Documento '{Id}' atualizado no Qdrant", id); } } catch (Exception ex) { _logger.LogError(ex, "Erro ao salvar documento '{Title}' no Qdrant", titulo); throw; } } public async Task SalvarTextoComEmbeddingNoMongoDB(string textoCompleto, string projectId) { try { var textoArray = ParseTextIntoSections(textoCompleto); // Processa seções em paralelo com limite de concorrência var semaphore = new SemaphoreSlim(5, 5); // Máximo 5 operações simultâneas var tasks = textoArray.Select(async item => { await semaphore.WaitAsync(); try { var lines = item.Split('\n', 2); var title = lines[0].Replace("**", "").Replace("\r", "").Trim(); var content = lines.Length > 1 ? lines[1] : ""; await SalvarNoMongoDB(title, content, projectId); } finally { semaphore.Release(); } }); await Task.WhenAll(tasks); semaphore.Dispose(); _logger.LogInformation("Texto completo processado: {SectionCount} seções salvas no Qdrant", textoArray.Count); } catch (Exception ex) { _logger.LogError(ex, "Erro ao processar texto completo no Qdrant"); throw; } } public async Task> GetAll() { try { // Usa cache de project IDs quando possível var projectIds = await GetAllProjectIdsOptimized(); if (!projectIds.Any()) { return Enumerable.Empty(); } var allDocuments = new List(); // Busca documentos em paralelo por projeto var semaphore = new SemaphoreSlim(3, 3); // Máximo 3 projetos simultâneos var tasks = projectIds.Select(async projectId => { await semaphore.WaitAsync(); try { return await _vectorSearchService.GetDocumentsByProjectAsync(projectId); } catch (Exception ex) { _logger.LogWarning(ex, "Erro ao buscar documentos do projeto {ProjectId}", projectId); return new List(); } finally { semaphore.Release(); } }); var results = await Task.WhenAll(tasks); semaphore.Dispose(); foreach (var projectDocs in results) { allDocuments.AddRange(projectDocs); } return allDocuments.Select(ConvertToTextoComEmbedding); } catch (Exception ex) { _logger.LogError(ex, "Erro ao recuperar todos os documentos do Qdrant"); throw; } } public async Task> GetByPorjectId(string projectId) { try { var documents = await _vectorSearchService.GetDocumentsByProjectAsync(projectId); return documents.Select(ConvertToTextoComEmbedding); } catch (Exception ex) { _logger.LogError(ex, "Erro ao recuperar documentos do projeto {ProjectId} no Qdrant", projectId); throw; } } public async Task GetById(string id) { try { var document = await _vectorSearchService.GetDocumentAsync(id); if (document == null) { throw new ArgumentException($"Documento {id} não encontrado no Qdrant"); } return ConvertToTextoComEmbedding(document); } catch (Exception ex) { _logger.LogError(ex, "Erro ao recuperar documento {Id} do Qdrant", id); throw; } } // ======================================== // MÉTODOS NOVOS DA INTERFACE // ======================================== public async Task SaveDocumentAsync(DocumentInput document) { try { var embedding = await GenerateEmbeddingOptimized($"**{document.Title}** \n\n {document.Content}"); string id; if (!string.IsNullOrEmpty(document.Id)) { // Atualizar documento existente await _vectorSearchService.UpdateDocumentAsync( document.Id, document.Title, document.Content, document.ProjectId, embedding, document.Metadata); id = document.Id; } else { // Criar novo documento id = await _vectorSearchService.AddDocumentAsync( document.Title, document.Content, document.ProjectId, embedding, document.Metadata); } // Atualiza cache de project IDs _projectIdCache.TryAdd(document.ProjectId, DateTime.UtcNow); _logger.LogDebug("Documento {Id} salvo no Qdrant via SaveDocumentAsync", id); return id; } catch (Exception ex) { _logger.LogError(ex, "Erro ao salvar documento no Qdrant"); throw; } } public async Task UpdateDocumentAsync(string id, DocumentInput document) { try { var embedding = await GenerateEmbeddingOptimized($"**{document.Title}** \n\n {document.Content}"); await _vectorSearchService.UpdateDocumentAsync( id, document.Title, document.Content, document.ProjectId, embedding, document.Metadata); _logger.LogDebug("Documento {Id} atualizado no Qdrant", id); } catch (Exception ex) { _logger.LogError(ex, "Erro ao atualizar documento {Id} no Qdrant", id); throw; } } public async Task DeleteDocumentAsync(string id) { try { await _vectorSearchService.DeleteDocumentAsync(id); _logger.LogDebug("Documento {Id} deletado do Qdrant", id); } catch (Exception ex) { _logger.LogError(ex, "Erro ao deletar documento {Id} do Qdrant", id); throw; } } public async Task DocumentExistsAsync(string id) { try { return await _vectorSearchService.DocumentExistsAsync(id); } catch (Exception ex) { _logger.LogError(ex, "Erro ao verificar existência do documento {Id} no Qdrant", id); return false; } } public async Task GetDocumentAsync(string id) { try { var result = await _vectorSearchService.GetDocumentAsync(id); if (result == null) return null; return new DocumentOutput { Id = result.Id, Title = result.Title, Content = result.Content, ProjectId = result.ProjectId, Embedding = result.Embedding, CreatedAt = result.CreatedAt, UpdatedAt = result.UpdatedAt, Metadata = result.Metadata }; } catch (Exception ex) { _logger.LogError(ex, "Erro ao recuperar documento {Id} do Qdrant", id); return null; } } public async Task> GetDocumentsByProjectAsync(string projectId) { try { var results = await _vectorSearchService.GetDocumentsByProjectAsync(projectId); return results.Select(result => new DocumentOutput { Id = result.Id, Title = result.Title, Content = result.Content, ProjectId = result.ProjectId, Embedding = result.Embedding, CreatedAt = result.CreatedAt, UpdatedAt = result.UpdatedAt, Metadata = result.Metadata }).ToList(); } catch (Exception ex) { _logger.LogError(ex, "Erro ao recuperar documentos do projeto {ProjectId} do Qdrant", projectId); throw; } } public async Task GetDocumentCountAsync(string? projectId = null) { try { return await _vectorSearchService.GetDocumentCountAsync(projectId); } catch (Exception ex) { _logger.LogError(ex, "Erro ao contar documentos no Qdrant"); return 0; } } // ======================================== // OPERAÇÕES EM LOTE OTIMIZADAS // ======================================== public async Task> SaveDocumentsBatchAsync(List documents) { var ids = new List(); var errors = new List(); // Agrupa documentos por projeto para otimizar embeddings var documentsByProject = documents.GroupBy(d => d.ProjectId).ToList(); foreach (var projectGroup in documentsByProject) { var projectDocs = projectGroup.ToList(); // Processa em lotes menores dentro do projeto var batchSize = 5; // Reduzido para evitar timeout for (int i = 0; i < projectDocs.Count; i += batchSize) { var batch = projectDocs.Skip(i).Take(batchSize); // Gera embeddings em paralelo para o lote var embeddingTasks = batch.Select(async doc => { try { var embedding = await GenerateEmbeddingOptimized($"**{doc.Title}** \n\n {doc.Content}"); return new { Document = doc, Embedding = embedding, Error = (Exception?)null }; } catch (Exception ex) { return new { Document = doc, Embedding = (double[]?)null, Error = ex }; } }); var embeddingResults = await Task.WhenAll(embeddingTasks); // Salva documentos com embeddings gerados var saveTasks = embeddingResults.Select(async result => { if (result.Error != null) { errors.Add(result.Error); return null; } try { string id; if (!string.IsNullOrEmpty(result.Document.Id)) { await _vectorSearchService.UpdateDocumentAsync( result.Document.Id, result.Document.Title, result.Document.Content, result.Document.ProjectId, result.Embedding!, result.Document.Metadata); id = result.Document.Id; } else { id = await _vectorSearchService.AddDocumentAsync( result.Document.Title, result.Document.Content, result.Document.ProjectId, result.Embedding!, result.Document.Metadata); } return id; } catch (Exception ex) { errors.Add(ex); _logger.LogError(ex, "Erro ao salvar documento '{Title}' em lote", result.Document.Title); return null; } }); var batchResults = await Task.WhenAll(saveTasks); ids.AddRange(batchResults.Where(id => id != null)!); } // Atualiza cache para o projeto _projectIdCache.TryAdd(projectGroup.Key, DateTime.UtcNow); } if (errors.Any()) { _logger.LogWarning("Batch save completado com {ErrorCount} erros de {TotalCount} documentos", errors.Count, documents.Count); } _logger.LogInformation("Batch save: {SuccessCount}/{TotalCount} documentos salvos no Qdrant", ids.Count, documents.Count); return ids; } public async Task DeleteDocumentsBatchAsync(List ids) { var errors = new List(); // Processa em lotes pequenos para não sobrecarregar var batchSize = 10; // Reduzido para melhor estabilidade for (int i = 0; i < ids.Count; i += batchSize) { var batch = ids.Skip(i).Take(batchSize); var tasks = batch.Select(async id => { try { await _vectorSearchService.DeleteDocumentAsync(id); return true; } catch (Exception ex) { errors.Add(ex); _logger.LogError(ex, "Erro ao deletar documento {Id} em lote", id); return false; } }); await Task.WhenAll(tasks); } if (errors.Any()) { _logger.LogWarning("Batch delete completado com {ErrorCount} erros de {TotalCount} documentos", errors.Count, ids.Count); } else { _logger.LogInformation("Batch delete: {TotalCount} documentos removidos do Qdrant", ids.Count); } } // ======================================== // ESTATÍSTICAS DO PROVIDER // ======================================== public async Task> GetProviderStatsAsync() { try { var baseStats = await _vectorSearchService.GetStatsAsync(); var totalDocs = await GetDocumentCountAsync(); // Usa cache para project IDs var projectIds = await GetAllProjectIdsOptimized(); var projectStats = new Dictionary(); // Busca contadores em paralelo var countTasks = projectIds.Select(async projectId => { try { var count = await GetDocumentCountAsync(projectId); return new { ProjectId = projectId, Count = count }; } catch { return new { ProjectId = projectId, Count = 0 }; } }); var countResults = await Task.WhenAll(countTasks); foreach (var result in countResults) { projectStats[result.ProjectId] = result.Count; } var enhancedStats = new Dictionary(baseStats) { ["text_service_provider"] = "Qdrant", ["total_documents_via_text_service"] = totalDocs, ["projects_count"] = projectIds.Count, ["documents_by_project"] = projectStats, ["supports_batch_operations"] = true, ["supports_metadata"] = true, ["embedding_auto_generation"] = true, ["cache_enabled"] = true, ["cached_project_ids"] = _projectIdCache.Count }; return enhancedStats; } catch (Exception ex) { return new Dictionary { ["provider"] = "Qdrant", ["text_service_provider"] = "Qdrant", ["health"] = "error", ["error"] = ex.Message, ["last_check"] = DateTime.UtcNow }; } } // ======================================== // MÉTODOS AUXILIARES PRIVADOS OTIMIZADOS // ======================================== private async Task GenerateEmbeddingOptimized(string content) { var embedding = await _embeddingService.GenerateEmbeddingAsync(content); return embedding.ToArray().Select(e => (double)e).ToArray(); } private static List ParseTextIntoSections(string textoCompleto) { var textoArray = new List(); string[] textolinhas = textoCompleto.Split(new string[] { "\n" }, StringSplitOptions.None); var title = textolinhas[0]; var builder = new StringBuilder(); foreach (string line in textolinhas) { if (line.StartsWith("**") || line.StartsWith("\r**")) { if (builder.Length > 0) { textoArray.Add(title.Replace("**", "").Replace("\r", "") + ": " + Environment.NewLine + builder.ToString()); builder = new StringBuilder(); title = line; } } else { builder.AppendLine(line); } } // Adiciona último bloco se houver if (builder.Length > 0) { textoArray.Add(title.Replace("**", "").Replace("\r", "") + ": " + Environment.NewLine + builder.ToString()); } return textoArray; } private async Task> GetAllProjectIdsOptimized() { // Remove entradas expiradas do cache var now = DateTime.UtcNow; var expiredKeys = _projectIdCache .Where(kvp => now - kvp.Value > _cacheTimeout) .Select(kvp => kvp.Key) .ToList(); foreach (var key in expiredKeys) { _projectIdCache.TryRemove(key, out _); } // Se temos dados no cache e não estão muito antigos, usa o cache if (_projectIdCache.Any()) { return _projectIdCache.Keys.ToList(); } // Caso contrário, busca no Qdrant try { // Esta busca é custosa, mas só será executada quando o cache estiver vazio var allResults = await _vectorSearchService.SearchSimilarAsync( new double[384], // Vector dummy menor projectId: null, threshold: 0.0, limit: 1000); // Limit menor para melhor performance var projectIds = allResults .Select(r => r.ProjectId) .Where(pid => !string.IsNullOrEmpty(pid)) .Distinct() .ToList(); // Atualiza cache foreach (var projectId in projectIds) { _projectIdCache.TryAdd(projectId, now); } return projectIds; } catch (Exception ex) { _logger.LogError(ex, "Erro ao recuperar IDs de projetos do Qdrant"); return new List(); } } private static TextoComEmbedding ConvertToTextoComEmbedding(VectorSearchResult result) { return new TextoComEmbedding { Id = result.Id, Titulo = result.Title, Conteudo = result.Content, ProjetoId = result.ProjectId, Embedding = result.Embedding, // Campos que podem não existir no Qdrant ProjetoNome = result.Metadata?.GetValueOrDefault("project_name")?.ToString() ?? "", TipoDocumento = result.Metadata?.GetValueOrDefault("document_type")?.ToString() ?? "", Categoria = result.Metadata?.GetValueOrDefault("category")?.ToString() ?? "", Tags = result.Metadata?.GetValueOrDefault("tags") as string[] ?? Array.Empty() }; } } } #pragma warning restore SKEXP0001