ChatRAG/Services/SearchVectors/MongoVectorSearchService.cs
2025-06-15 23:03:45 -03:00

229 lines
7.6 KiB
C#

using ChatRAG.Contracts.VectorSearch;
using ChatRAG.Data;
using ChatRAG.Models;
using Microsoft.Extensions.VectorData;
using Microsoft.SemanticKernel.Embeddings;
#pragma warning disable SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
namespace ChatRAG.Services.SearchVectors
{
public class MongoVectorSearchService : IVectorSearchService
{
private readonly TextDataRepository _textDataRepository;
private readonly ITextEmbeddingGenerationService _embeddingService;
public MongoVectorSearchService(
TextDataRepository textDataRepository,
ITextEmbeddingGenerationService embeddingService)
{
_textDataRepository = textDataRepository;
_embeddingService = embeddingService;
}
// ... resto da implementação permanece igual ...
// (copiar do código anterior)
public async Task<List<VectorSearchResult>> SearchSimilarAsync(
double[] queryEmbedding,
string? projectId = null,
double threshold = 0.3,
int limit = 5,
Dictionary<string, object>? filters = null)
{
List<TextoComEmbedding> textos = null;
try
{
textos = string.IsNullOrEmpty(projectId)
? await _textDataRepository.GetAsync()
: await _textDataRepository.GetByProjectIdAsync(projectId);
}
catch (Exception ex)
{
throw new Exception($"Erro ao buscar documentos: {ex.Message}");
}
var resultados = textos
.Select(texto => new VectorSearchResult
{
Id = texto.Id,
Title = texto.Titulo,
Content = texto.Conteudo,
ProjectId = texto.ProjetoId,
Score = CalcularSimilaridadeCoseno(queryEmbedding, texto.Embedding),
Embedding = texto.Embedding,
Provider = "MongoDB",
CreatedAt = DateTime.UtcNow,
UpdatedAt = DateTime.UtcNow
})
.Where(r => r.Score >= threshold)
.OrderByDescending(r => r.Score)
.Take(limit)
.ToList();
return resultados;
}
public async Task<List<VectorSearchResult>> SearchSimilarDynamicAsync(
double[] queryEmbedding,
string projectId,
double minThreshold = 0.5,
int limit = 5)
{
var resultados = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold, limit);
if (resultados.Count < 3)
{
resultados = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold * 0.7, limit);
}
if (resultados.Count < 3)
{
resultados = await SearchSimilarAsync(queryEmbedding, projectId, 0.2, limit);
}
return resultados.Take(limit).ToList();
}
public async Task<string> AddDocumentAsync(
string title,
string content,
string projectId,
double[] embedding,
Dictionary<string, object>? metadata = null)
{
var documento = new TextoComEmbedding
{
Id = Guid.NewGuid().ToString(),
Titulo = title,
Conteudo = content,
ProjetoId = projectId,
Embedding = embedding
};
await _textDataRepository.CreateAsync(documento);
return documento.Id;
}
public async Task UpdateDocumentAsync(
string id,
string title,
string content,
string projectId,
double[] embedding,
Dictionary<string, object>? metadata = null)
{
var documento = new TextoComEmbedding
{
Id = id,
Titulo = title,
Conteudo = content,
ProjetoId = projectId,
Embedding = embedding
};
await _textDataRepository.UpdateAsync(id, documento);
}
public async Task DeleteDocumentAsync(string id)
{
await _textDataRepository.RemoveAsync(id);
}
public async Task<bool> DocumentExistsAsync(string id)
{
var doc = await _textDataRepository.GetAsync(id);
return doc != null;
}
public async Task<VectorSearchResult?> GetDocumentAsync(string id)
{
var doc = await _textDataRepository.GetAsync(id);
if (doc == null) return null;
return new VectorSearchResult
{
Id = doc.Id,
Title = doc.Titulo,
Content = doc.Conteudo,
ProjectId = doc.ProjetoId,
Score = 1.0,
Embedding = doc.Embedding,
Provider = "MongoDB",
CreatedAt = DateTime.UtcNow,
UpdatedAt = DateTime.UtcNow
};
}
public async Task<List<VectorSearchResult>> GetDocumentsByProjectAsync(string projectId)
{
var docs = await _textDataRepository.GetByProjectIdAsync(projectId);
return docs.Select(doc => new VectorSearchResult
{
Id = doc.Id,
Title = doc.Titulo,
Content = doc.Conteudo,
ProjectId = doc.ProjetoId,
Score = 1.0,
Embedding = doc.Embedding,
Provider = "MongoDB",
CreatedAt = DateTime.UtcNow,
UpdatedAt = DateTime.UtcNow
}).ToList();
}
public async Task<int> GetDocumentCountAsync(string? projectId = null)
{
if (string.IsNullOrEmpty(projectId))
{
var all = await _textDataRepository.GetAsync();
return all.Count;
}
else
{
var byProject = await _textDataRepository.GetByProjectIdAsync(projectId);
return byProject.Count;
}
}
public async Task<bool> IsHealthyAsync()
{
try
{
var count = await GetDocumentCountAsync();
return true;
}
catch
{
return false;
}
}
public async Task<Dictionary<string, object>> GetStatsAsync()
{
var totalDocs = await GetDocumentCountAsync();
return new Dictionary<string, object>
{
["provider"] = "MongoDB",
["total_documents"] = totalDocs,
["health"] = await IsHealthyAsync(),
["last_check"] = DateTime.UtcNow
};
}
private double CalcularSimilaridadeCoseno(double[] embedding1, double[] embedding2)
{
double dotProduct = 0.0;
double normA = 0.0;
double normB = 0.0;
for (int i = 0; i < embedding1.Length; i++)
{
dotProduct += embedding1[i] * embedding2[i];
normA += Math.Pow(embedding1[i], 2);
normB += Math.Pow(embedding2[i], 2);
}
return dotProduct / (Math.Sqrt(normA) * Math.Sqrt(normB));
}
}
}
#pragma warning restore SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.