674 lines
25 KiB
C#
674 lines
25 KiB
C#
#pragma warning disable SKEXP0001
|
|
|
|
using ChatRAG.Contracts.VectorSearch;
|
|
using ChatRAG.Data;
|
|
using ChatRAG.Models;
|
|
using ChatRAG.Services.Contracts;
|
|
using Microsoft.SemanticKernel.Embeddings;
|
|
using System.Text;
|
|
using System.Collections.Concurrent;
|
|
|
|
namespace ChatRAG.Services.TextServices
|
|
{
|
|
public class QdrantTextDataService : ITextDataService
|
|
{
|
|
private readonly IVectorSearchService _vectorSearchService;
|
|
private readonly ITextEmbeddingGenerationService _embeddingService;
|
|
private readonly ILogger<QdrantTextDataService> _logger;
|
|
|
|
// Cache para project IDs para evitar buscas custosas
|
|
private readonly ConcurrentDictionary<string, DateTime> _projectIdCache = new();
|
|
private readonly TimeSpan _cacheTimeout = TimeSpan.FromMinutes(5);
|
|
|
|
public QdrantTextDataService(
|
|
IVectorSearchService vectorSearchService,
|
|
ITextEmbeddingGenerationService embeddingService,
|
|
ILogger<QdrantTextDataService> logger)
|
|
{
|
|
_vectorSearchService = vectorSearchService;
|
|
_embeddingService = embeddingService;
|
|
_logger = logger;
|
|
}
|
|
|
|
public string ProviderName => "Qdrant";
|
|
|
|
// ========================================
|
|
// MÉTODOS ORIGINAIS (compatibilidade com MongoDB)
|
|
// ========================================
|
|
|
|
public async Task SalvarNoMongoDB(string titulo, string texto, string projectId)
|
|
{
|
|
await SalvarNoMongoDB(null, titulo, texto, projectId);
|
|
}
|
|
|
|
public async Task SalvarNoMongoDB(string? id, string titulo, string texto, string projectId)
|
|
{
|
|
try
|
|
{
|
|
var conteudo = $"**{titulo}** \n\n {texto}";
|
|
|
|
// Gera embedding uma única vez
|
|
var embedding = await GenerateEmbeddingOptimized(conteudo);
|
|
|
|
if (string.IsNullOrEmpty(id))
|
|
{
|
|
// Cria novo documento
|
|
var newId = await _vectorSearchService.AddDocumentAsync(titulo, texto, projectId, embedding);
|
|
|
|
// Atualiza cache de project IDs
|
|
_projectIdCache.TryAdd(projectId, DateTime.UtcNow);
|
|
|
|
_logger.LogDebug("Documento '{Title}' criado no Qdrant com ID {Id}", titulo, newId);
|
|
}
|
|
else
|
|
{
|
|
// Atualiza documento existente
|
|
await _vectorSearchService.UpdateDocumentAsync(id, titulo, texto, projectId, embedding);
|
|
_logger.LogDebug("Documento '{Id}' atualizado no Qdrant", id);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao salvar documento '{Title}' no Qdrant", titulo);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task SalvarTextoComEmbeddingNoMongoDB(string textoCompleto, string projectId)
|
|
{
|
|
try
|
|
{
|
|
var textoArray = ParseTextIntoSections(textoCompleto);
|
|
|
|
// Processa seções em paralelo com limite de concorrência
|
|
var semaphore = new SemaphoreSlim(5, 5); // Máximo 5 operações simultâneas
|
|
var tasks = textoArray.Select(async item =>
|
|
{
|
|
await semaphore.WaitAsync();
|
|
try
|
|
{
|
|
var lines = item.Split('\n', 2);
|
|
var title = lines[0].Replace("**", "").Replace("\r", "").Trim();
|
|
var content = lines.Length > 1 ? lines[1] : "";
|
|
|
|
await SalvarNoMongoDB(title, content, projectId);
|
|
}
|
|
finally
|
|
{
|
|
semaphore.Release();
|
|
}
|
|
});
|
|
|
|
await Task.WhenAll(tasks);
|
|
semaphore.Dispose();
|
|
|
|
_logger.LogInformation("Texto completo processado: {SectionCount} seções salvas no Qdrant", textoArray.Count);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao processar texto completo no Qdrant");
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<IEnumerable<TextoComEmbedding>> GetAll()
|
|
{
|
|
try
|
|
{
|
|
// Usa cache de project IDs quando possível
|
|
var projectIds = await GetAllProjectIdsOptimized();
|
|
|
|
if (!projectIds.Any())
|
|
{
|
|
return Enumerable.Empty<TextoComEmbedding>();
|
|
}
|
|
|
|
var allDocuments = new List<VectorSearchResult>();
|
|
|
|
// Busca documentos em paralelo por projeto
|
|
var semaphore = new SemaphoreSlim(3, 3); // Máximo 3 projetos simultâneos
|
|
var tasks = projectIds.Select(async projectId =>
|
|
{
|
|
await semaphore.WaitAsync();
|
|
try
|
|
{
|
|
return await _vectorSearchService.GetDocumentsByProjectAsync(projectId);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Erro ao buscar documentos do projeto {ProjectId}", projectId);
|
|
return new List<VectorSearchResult>();
|
|
}
|
|
finally
|
|
{
|
|
semaphore.Release();
|
|
}
|
|
});
|
|
|
|
var results = await Task.WhenAll(tasks);
|
|
semaphore.Dispose();
|
|
|
|
foreach (var projectDocs in results)
|
|
{
|
|
allDocuments.AddRange(projectDocs);
|
|
}
|
|
|
|
return allDocuments.Select(ConvertToTextoComEmbedding);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao recuperar todos os documentos do Qdrant");
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<IEnumerable<TextoComEmbedding>> GetByPorjectId(string projectId)
|
|
{
|
|
try
|
|
{
|
|
var documents = await _vectorSearchService.GetDocumentsByProjectAsync(projectId);
|
|
return documents.Select(ConvertToTextoComEmbedding);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao recuperar documentos do projeto {ProjectId} no Qdrant", projectId);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<TextoComEmbedding> GetById(string id)
|
|
{
|
|
try
|
|
{
|
|
var document = await _vectorSearchService.GetDocumentAsync(id);
|
|
if (document == null)
|
|
{
|
|
throw new ArgumentException($"Documento {id} não encontrado no Qdrant");
|
|
}
|
|
|
|
return ConvertToTextoComEmbedding(document);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao recuperar documento {Id} do Qdrant", id);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// MÉTODOS NOVOS DA INTERFACE
|
|
// ========================================
|
|
|
|
public async Task<string> SaveDocumentAsync(DocumentInput document)
|
|
{
|
|
try
|
|
{
|
|
var embedding = await GenerateEmbeddingOptimized($"**{document.Title}** \n\n {document.Content}");
|
|
|
|
string id;
|
|
if (!string.IsNullOrEmpty(document.Id))
|
|
{
|
|
// Atualizar documento existente
|
|
await _vectorSearchService.UpdateDocumentAsync(
|
|
document.Id,
|
|
document.Title,
|
|
document.Content,
|
|
document.ProjectId,
|
|
embedding,
|
|
document.Metadata);
|
|
id = document.Id;
|
|
}
|
|
else
|
|
{
|
|
// Criar novo documento
|
|
id = await _vectorSearchService.AddDocumentAsync(
|
|
document.Title,
|
|
document.Content,
|
|
document.ProjectId,
|
|
embedding,
|
|
document.Metadata);
|
|
}
|
|
|
|
// Atualiza cache de project IDs
|
|
_projectIdCache.TryAdd(document.ProjectId, DateTime.UtcNow);
|
|
|
|
_logger.LogDebug("Documento {Id} salvo no Qdrant via SaveDocumentAsync", id);
|
|
return id;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao salvar documento no Qdrant");
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task UpdateDocumentAsync(string id, DocumentInput document)
|
|
{
|
|
try
|
|
{
|
|
var embedding = await GenerateEmbeddingOptimized($"**{document.Title}** \n\n {document.Content}");
|
|
|
|
await _vectorSearchService.UpdateDocumentAsync(
|
|
id,
|
|
document.Title,
|
|
document.Content,
|
|
document.ProjectId,
|
|
embedding,
|
|
document.Metadata);
|
|
|
|
_logger.LogDebug("Documento {Id} atualizado no Qdrant", id);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao atualizar documento {Id} no Qdrant", id);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task DeleteDocumentAsync(string id)
|
|
{
|
|
try
|
|
{
|
|
await _vectorSearchService.DeleteDocumentAsync(id);
|
|
_logger.LogDebug("Documento {Id} deletado do Qdrant", id);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao deletar documento {Id} do Qdrant", id);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<bool> DocumentExistsAsync(string id)
|
|
{
|
|
try
|
|
{
|
|
return await _vectorSearchService.DocumentExistsAsync(id);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao verificar existência do documento {Id} no Qdrant", id);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public async Task<DocumentOutput?> GetDocumentAsync(string id)
|
|
{
|
|
try
|
|
{
|
|
var result = await _vectorSearchService.GetDocumentAsync(id);
|
|
if (result == null) return null;
|
|
|
|
return new DocumentOutput
|
|
{
|
|
Id = result.Id,
|
|
Title = result.Title,
|
|
Content = result.Content,
|
|
ProjectId = result.ProjectId,
|
|
Embedding = result.Embedding,
|
|
CreatedAt = result.CreatedAt,
|
|
UpdatedAt = result.UpdatedAt,
|
|
Metadata = result.Metadata
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao recuperar documento {Id} do Qdrant", id);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
public async Task<List<DocumentOutput>> GetDocumentsByProjectAsync(string projectId)
|
|
{
|
|
try
|
|
{
|
|
var results = await _vectorSearchService.GetDocumentsByProjectAsync(projectId);
|
|
|
|
return results.Select(result => new DocumentOutput
|
|
{
|
|
Id = result.Id,
|
|
Title = result.Title,
|
|
Content = result.Content,
|
|
ProjectId = result.ProjectId,
|
|
Embedding = result.Embedding,
|
|
CreatedAt = result.CreatedAt,
|
|
UpdatedAt = result.UpdatedAt,
|
|
Metadata = result.Metadata
|
|
}).ToList();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao recuperar documentos do projeto {ProjectId} do Qdrant", projectId);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<int> GetDocumentCountAsync(string? projectId = null)
|
|
{
|
|
try
|
|
{
|
|
return await _vectorSearchService.GetDocumentCountAsync(projectId);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao contar documentos no Qdrant");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// OPERAÇÕES EM LOTE OTIMIZADAS
|
|
// ========================================
|
|
|
|
public async Task<List<string>> SaveDocumentsBatchAsync(List<DocumentInput> documents)
|
|
{
|
|
var ids = new List<string>();
|
|
var errors = new List<Exception>();
|
|
|
|
// Agrupa documentos por projeto para otimizar embeddings
|
|
var documentsByProject = documents.GroupBy(d => d.ProjectId).ToList();
|
|
|
|
foreach (var projectGroup in documentsByProject)
|
|
{
|
|
var projectDocs = projectGroup.ToList();
|
|
|
|
// Processa em lotes menores dentro do projeto
|
|
var batchSize = 5; // Reduzido para evitar timeout
|
|
for (int i = 0; i < projectDocs.Count; i += batchSize)
|
|
{
|
|
var batch = projectDocs.Skip(i).Take(batchSize);
|
|
|
|
// Gera embeddings em paralelo para o lote
|
|
var embeddingTasks = batch.Select(async doc =>
|
|
{
|
|
try
|
|
{
|
|
var embedding = await GenerateEmbeddingOptimized($"**{doc.Title}** \n\n {doc.Content}");
|
|
return new { Document = doc, Embedding = embedding, Error = (Exception?)null };
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new { Document = doc, Embedding = (double[]?)null, Error = ex };
|
|
}
|
|
});
|
|
|
|
var embeddingResults = await Task.WhenAll(embeddingTasks);
|
|
|
|
// Salva documentos com embeddings gerados
|
|
var saveTasks = embeddingResults.Select(async result =>
|
|
{
|
|
if (result.Error != null)
|
|
{
|
|
errors.Add(result.Error);
|
|
return null;
|
|
}
|
|
|
|
try
|
|
{
|
|
string id;
|
|
if (!string.IsNullOrEmpty(result.Document.Id))
|
|
{
|
|
await _vectorSearchService.UpdateDocumentAsync(
|
|
result.Document.Id,
|
|
result.Document.Title,
|
|
result.Document.Content,
|
|
result.Document.ProjectId,
|
|
result.Embedding!,
|
|
result.Document.Metadata);
|
|
id = result.Document.Id;
|
|
}
|
|
else
|
|
{
|
|
id = await _vectorSearchService.AddDocumentAsync(
|
|
result.Document.Title,
|
|
result.Document.Content,
|
|
result.Document.ProjectId,
|
|
result.Embedding!,
|
|
result.Document.Metadata);
|
|
}
|
|
return id;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
errors.Add(ex);
|
|
_logger.LogError(ex, "Erro ao salvar documento '{Title}' em lote", result.Document.Title);
|
|
return null;
|
|
}
|
|
});
|
|
|
|
var batchResults = await Task.WhenAll(saveTasks);
|
|
ids.AddRange(batchResults.Where(id => id != null)!);
|
|
}
|
|
|
|
// Atualiza cache para o projeto
|
|
_projectIdCache.TryAdd(projectGroup.Key, DateTime.UtcNow);
|
|
}
|
|
|
|
if (errors.Any())
|
|
{
|
|
_logger.LogWarning("Batch save completado com {ErrorCount} erros de {TotalCount} documentos",
|
|
errors.Count, documents.Count);
|
|
}
|
|
|
|
_logger.LogInformation("Batch save: {SuccessCount}/{TotalCount} documentos salvos no Qdrant",
|
|
ids.Count, documents.Count);
|
|
|
|
return ids;
|
|
}
|
|
|
|
public async Task DeleteDocumentsBatchAsync(List<string> ids)
|
|
{
|
|
var errors = new List<Exception>();
|
|
|
|
// Processa em lotes pequenos para não sobrecarregar
|
|
var batchSize = 10; // Reduzido para melhor estabilidade
|
|
for (int i = 0; i < ids.Count; i += batchSize)
|
|
{
|
|
var batch = ids.Skip(i).Take(batchSize);
|
|
var tasks = batch.Select(async id =>
|
|
{
|
|
try
|
|
{
|
|
await _vectorSearchService.DeleteDocumentAsync(id);
|
|
return true;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
errors.Add(ex);
|
|
_logger.LogError(ex, "Erro ao deletar documento {Id} em lote", id);
|
|
return false;
|
|
}
|
|
});
|
|
|
|
await Task.WhenAll(tasks);
|
|
}
|
|
|
|
if (errors.Any())
|
|
{
|
|
_logger.LogWarning("Batch delete completado com {ErrorCount} erros de {TotalCount} documentos",
|
|
errors.Count, ids.Count);
|
|
}
|
|
else
|
|
{
|
|
_logger.LogInformation("Batch delete: {TotalCount} documentos removidos do Qdrant", ids.Count);
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// ESTATÍSTICAS DO PROVIDER
|
|
// ========================================
|
|
|
|
public async Task<Dictionary<string, object>> GetProviderStatsAsync()
|
|
{
|
|
try
|
|
{
|
|
var baseStats = await _vectorSearchService.GetStatsAsync();
|
|
var totalDocs = await GetDocumentCountAsync();
|
|
|
|
// Usa cache para project IDs
|
|
var projectIds = await GetAllProjectIdsOptimized();
|
|
var projectStats = new Dictionary<string, int>();
|
|
|
|
// Busca contadores em paralelo
|
|
var countTasks = projectIds.Select(async projectId =>
|
|
{
|
|
try
|
|
{
|
|
var count = await GetDocumentCountAsync(projectId);
|
|
return new { ProjectId = projectId, Count = count };
|
|
}
|
|
catch
|
|
{
|
|
return new { ProjectId = projectId, Count = 0 };
|
|
}
|
|
});
|
|
|
|
var countResults = await Task.WhenAll(countTasks);
|
|
foreach (var result in countResults)
|
|
{
|
|
projectStats[result.ProjectId] = result.Count;
|
|
}
|
|
|
|
var enhancedStats = new Dictionary<string, object>(baseStats)
|
|
{
|
|
["text_service_provider"] = "Qdrant",
|
|
["total_documents_via_text_service"] = totalDocs,
|
|
["projects_count"] = projectIds.Count,
|
|
["documents_by_project"] = projectStats,
|
|
["supports_batch_operations"] = true,
|
|
["supports_metadata"] = true,
|
|
["embedding_auto_generation"] = true,
|
|
["cache_enabled"] = true,
|
|
["cached_project_ids"] = _projectIdCache.Count
|
|
};
|
|
|
|
return enhancedStats;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new Dictionary<string, object>
|
|
{
|
|
["provider"] = "Qdrant",
|
|
["text_service_provider"] = "Qdrant",
|
|
["health"] = "error",
|
|
["error"] = ex.Message,
|
|
["last_check"] = DateTime.UtcNow
|
|
};
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// MÉTODOS AUXILIARES PRIVADOS OTIMIZADOS
|
|
// ========================================
|
|
|
|
private async Task<double[]> GenerateEmbeddingOptimized(string content)
|
|
{
|
|
var embedding = await _embeddingService.GenerateEmbeddingAsync(content);
|
|
return embedding.ToArray().Select(e => (double)e).ToArray();
|
|
}
|
|
|
|
private static List<string> ParseTextIntoSections(string textoCompleto)
|
|
{
|
|
var textoArray = new List<string>();
|
|
string[] textolinhas = textoCompleto.Split(new string[] { "\n" }, StringSplitOptions.None);
|
|
|
|
var title = textolinhas[0];
|
|
var builder = new StringBuilder();
|
|
|
|
foreach (string line in textolinhas)
|
|
{
|
|
if (line.StartsWith("**") || line.StartsWith("\r**"))
|
|
{
|
|
if (builder.Length > 0)
|
|
{
|
|
textoArray.Add(title.Replace("**", "").Replace("\r", "") + ": " + Environment.NewLine + builder.ToString());
|
|
builder = new StringBuilder();
|
|
title = line;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
builder.AppendLine(line);
|
|
}
|
|
}
|
|
|
|
// Adiciona último bloco se houver
|
|
if (builder.Length > 0)
|
|
{
|
|
textoArray.Add(title.Replace("**", "").Replace("\r", "") + ": " + Environment.NewLine + builder.ToString());
|
|
}
|
|
|
|
return textoArray;
|
|
}
|
|
|
|
private async Task<List<string>> GetAllProjectIdsOptimized()
|
|
{
|
|
// Remove entradas expiradas do cache
|
|
var now = DateTime.UtcNow;
|
|
var expiredKeys = _projectIdCache
|
|
.Where(kvp => now - kvp.Value > _cacheTimeout)
|
|
.Select(kvp => kvp.Key)
|
|
.ToList();
|
|
|
|
foreach (var key in expiredKeys)
|
|
{
|
|
_projectIdCache.TryRemove(key, out _);
|
|
}
|
|
|
|
// Se temos dados no cache e não estão muito antigos, usa o cache
|
|
if (_projectIdCache.Any())
|
|
{
|
|
return _projectIdCache.Keys.ToList();
|
|
}
|
|
|
|
// Caso contrário, busca no Qdrant
|
|
try
|
|
{
|
|
// Esta busca é custosa, mas só será executada quando o cache estiver vazio
|
|
var allResults = await _vectorSearchService.SearchSimilarAsync(
|
|
new double[384], // Vector dummy menor
|
|
projectId: null,
|
|
threshold: 0.0,
|
|
limit: 1000); // Limit menor para melhor performance
|
|
|
|
var projectIds = allResults
|
|
.Select(r => r.ProjectId)
|
|
.Where(pid => !string.IsNullOrEmpty(pid))
|
|
.Distinct()
|
|
.ToList();
|
|
|
|
// Atualiza cache
|
|
foreach (var projectId in projectIds)
|
|
{
|
|
_projectIdCache.TryAdd(projectId, now);
|
|
}
|
|
|
|
return projectIds;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao recuperar IDs de projetos do Qdrant");
|
|
return new List<string>();
|
|
}
|
|
}
|
|
|
|
private static TextoComEmbedding ConvertToTextoComEmbedding(VectorSearchResult result)
|
|
{
|
|
return new TextoComEmbedding
|
|
{
|
|
Id = result.Id,
|
|
Titulo = result.Title,
|
|
Conteudo = result.Content,
|
|
ProjetoId = result.ProjectId,
|
|
Embedding = result.Embedding,
|
|
// Campos que podem não existir no Qdrant
|
|
ProjetoNome = result.Metadata?.GetValueOrDefault("project_name")?.ToString() ?? "",
|
|
TipoDocumento = result.Metadata?.GetValueOrDefault("document_type")?.ToString() ?? "",
|
|
Categoria = result.Metadata?.GetValueOrDefault("category")?.ToString() ?? "",
|
|
Tags = result.Metadata?.GetValueOrDefault("tags") as string[] ?? Array.Empty<string>()
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
#pragma warning restore SKEXP0001 |