221 lines
7.4 KiB
C#
221 lines
7.4 KiB
C#
using ChatRAG.Contracts.VectorSearch;
|
|
using ChatRAG.Data;
|
|
using ChatRAG.Models;
|
|
using Microsoft.Extensions.VectorData;
|
|
using Microsoft.SemanticKernel.Embeddings;
|
|
|
|
#pragma warning disable SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
|
|
namespace ChatRAG.Services.SearchVectors
|
|
{
|
|
public class MongoVectorSearchService : IVectorSearchService
|
|
{
|
|
private readonly TextDataRepository _textDataRepository;
|
|
private readonly ITextEmbeddingGenerationService _embeddingService;
|
|
|
|
public MongoVectorSearchService(
|
|
TextDataRepository textDataRepository,
|
|
ITextEmbeddingGenerationService embeddingService)
|
|
{
|
|
_textDataRepository = textDataRepository;
|
|
_embeddingService = embeddingService;
|
|
}
|
|
|
|
// ... resto da implementação permanece igual ...
|
|
// (copiar do código anterior)
|
|
|
|
public async Task<List<VectorSearchResult>> SearchSimilarAsync(
|
|
double[] queryEmbedding,
|
|
string? projectId = null,
|
|
double threshold = 0.3,
|
|
int limit = 5,
|
|
Dictionary<string, object>? filters = null)
|
|
{
|
|
var textos = string.IsNullOrEmpty(projectId)
|
|
? await _textDataRepository.GetAsync()
|
|
: await _textDataRepository.GetByProjectIdAsync(projectId);
|
|
|
|
var resultados = textos
|
|
.Select(texto => new VectorSearchResult
|
|
{
|
|
Id = texto.Id,
|
|
Title = texto.Titulo,
|
|
Content = texto.Conteudo,
|
|
ProjectId = texto.ProjetoId,
|
|
Score = CalcularSimilaridadeCoseno(queryEmbedding, texto.Embedding),
|
|
Embedding = texto.Embedding,
|
|
Provider = "MongoDB",
|
|
CreatedAt = DateTime.UtcNow,
|
|
UpdatedAt = DateTime.UtcNow
|
|
})
|
|
.Where(r => r.Score >= threshold)
|
|
.OrderByDescending(r => r.Score)
|
|
.Take(limit)
|
|
.ToList();
|
|
|
|
return resultados;
|
|
}
|
|
|
|
public async Task<List<VectorSearchResult>> SearchSimilarDynamicAsync(
|
|
double[] queryEmbedding,
|
|
string projectId,
|
|
double minThreshold = 0.5,
|
|
int limit = 5)
|
|
{
|
|
var resultados = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold, limit);
|
|
|
|
if (resultados.Count < 3)
|
|
{
|
|
resultados = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold * 0.7, limit);
|
|
}
|
|
|
|
if (resultados.Count < 3)
|
|
{
|
|
resultados = await SearchSimilarAsync(queryEmbedding, projectId, 0.2, limit);
|
|
}
|
|
|
|
return resultados.Take(limit).ToList();
|
|
}
|
|
|
|
public async Task<string> AddDocumentAsync(
|
|
string title,
|
|
string content,
|
|
string projectId,
|
|
double[] embedding,
|
|
Dictionary<string, object>? metadata = null)
|
|
{
|
|
var documento = new TextoComEmbedding
|
|
{
|
|
Id = Guid.NewGuid().ToString(),
|
|
Titulo = title,
|
|
Conteudo = content,
|
|
ProjetoId = projectId,
|
|
Embedding = embedding
|
|
};
|
|
|
|
await _textDataRepository.CreateAsync(documento);
|
|
return documento.Id;
|
|
}
|
|
|
|
public async Task UpdateDocumentAsync(
|
|
string id,
|
|
string title,
|
|
string content,
|
|
string projectId,
|
|
double[] embedding,
|
|
Dictionary<string, object>? metadata = null)
|
|
{
|
|
var documento = new TextoComEmbedding
|
|
{
|
|
Id = id,
|
|
Titulo = title,
|
|
Conteudo = content,
|
|
ProjetoId = projectId,
|
|
Embedding = embedding
|
|
};
|
|
|
|
await _textDataRepository.UpdateAsync(id, documento);
|
|
}
|
|
|
|
public async Task DeleteDocumentAsync(string id)
|
|
{
|
|
await _textDataRepository.RemoveAsync(id);
|
|
}
|
|
|
|
public async Task<bool> DocumentExistsAsync(string id)
|
|
{
|
|
var doc = await _textDataRepository.GetAsync(id);
|
|
return doc != null;
|
|
}
|
|
|
|
public async Task<VectorSearchResult?> GetDocumentAsync(string id)
|
|
{
|
|
var doc = await _textDataRepository.GetAsync(id);
|
|
if (doc == null) return null;
|
|
|
|
return new VectorSearchResult
|
|
{
|
|
Id = doc.Id,
|
|
Title = doc.Titulo,
|
|
Content = doc.Conteudo,
|
|
ProjectId = doc.ProjetoId,
|
|
Score = 1.0,
|
|
Embedding = doc.Embedding,
|
|
Provider = "MongoDB",
|
|
CreatedAt = DateTime.UtcNow,
|
|
UpdatedAt = DateTime.UtcNow
|
|
};
|
|
}
|
|
|
|
public async Task<List<VectorSearchResult>> GetDocumentsByProjectAsync(string projectId)
|
|
{
|
|
var docs = await _textDataRepository.GetByProjectIdAsync(projectId);
|
|
return docs.Select(doc => new VectorSearchResult
|
|
{
|
|
Id = doc.Id,
|
|
Title = doc.Titulo,
|
|
Content = doc.Conteudo,
|
|
ProjectId = doc.ProjetoId,
|
|
Score = 1.0,
|
|
Embedding = doc.Embedding,
|
|
Provider = "MongoDB",
|
|
CreatedAt = DateTime.UtcNow,
|
|
UpdatedAt = DateTime.UtcNow
|
|
}).ToList();
|
|
}
|
|
|
|
public async Task<int> GetDocumentCountAsync(string? projectId = null)
|
|
{
|
|
if (string.IsNullOrEmpty(projectId))
|
|
{
|
|
var all = await _textDataRepository.GetAsync();
|
|
return all.Count;
|
|
}
|
|
else
|
|
{
|
|
var byProject = await _textDataRepository.GetByProjectIdAsync(projectId);
|
|
return byProject.Count;
|
|
}
|
|
}
|
|
|
|
public async Task<bool> IsHealthyAsync()
|
|
{
|
|
try
|
|
{
|
|
var count = await GetDocumentCountAsync();
|
|
return true;
|
|
}
|
|
catch
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public async Task<Dictionary<string, object>> GetStatsAsync()
|
|
{
|
|
var totalDocs = await GetDocumentCountAsync();
|
|
return new Dictionary<string, object>
|
|
{
|
|
["provider"] = "MongoDB",
|
|
["total_documents"] = totalDocs,
|
|
["health"] = await IsHealthyAsync(),
|
|
["last_check"] = DateTime.UtcNow
|
|
};
|
|
}
|
|
|
|
private double CalcularSimilaridadeCoseno(double[] embedding1, double[] embedding2)
|
|
{
|
|
double dotProduct = 0.0;
|
|
double normA = 0.0;
|
|
double normB = 0.0;
|
|
for (int i = 0; i < embedding1.Length; i++)
|
|
{
|
|
dotProduct += embedding1[i] * embedding2[i];
|
|
normA += Math.Pow(embedding1[i], 2);
|
|
normB += Math.Pow(embedding2[i], 2);
|
|
}
|
|
return dotProduct / (Math.Sqrt(normA) * Math.Sqrt(normB));
|
|
}
|
|
}
|
|
}
|
|
#pragma warning restore SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
|