ChatRAG/Services/SearchVectors/QdrantVectorSearchService.cs
2025-06-21 14:20:07 -03:00

557 lines
19 KiB
C#

using ChatRAG.Contracts.VectorSearch;
using ChatRAG.Settings.ChatRAG.Configuration;
using Microsoft.Extensions.Options;
using Qdrant.Client.Grpc;
using ChatRAG.Models;
using ChatRAG.Services.Contracts;
using Qdrant.Client;
using static Qdrant.Client.Grpc.Conditions;
using System.Drawing;
using System.Collections.Concurrent;
#pragma warning disable SKEXP0001
namespace ChatRAG.Services.SearchVectors
{
public class QdrantVectorSearchService : IVectorSearchService
{
private readonly QdrantClient _client;
private readonly QdrantSettings _settings;
private readonly ILogger<QdrantVectorSearchService> _logger;
private volatile bool _collectionInitialized = false;
private readonly SemaphoreSlim _initializationSemaphore = new(1, 1);
private readonly ConcurrentDictionary<string, bool> _collectionCache = new();
public QdrantVectorSearchService(
IOptions<VectorDatabaseSettings> settings,
ILogger<QdrantVectorSearchService> logger)
{
_settings = settings.Value.Qdrant;
_logger = logger;
_client = new QdrantClient(_settings.Host, _settings.Port, https: _settings.UseTls);
_logger.LogInformation("QdrantVectorSearchService inicializado para {Host}:{Port}",
_settings.Host, _settings.Port);
}
private async Task EnsureCollectionExistsAsync()
{
if (_collectionInitialized) return;
await _initializationSemaphore.WaitAsync();
try
{
if (_collectionInitialized) return;
// Verifica cache primeiro
if (_collectionCache.TryGetValue(_settings.CollectionName, out bool exists) && exists)
{
_collectionInitialized = true;
return;
}
var collectionExists = await _client.CollectionExistsAsync(_settings.CollectionName);
_collectionCache.TryAdd(_settings.CollectionName, collectionExists);
if (!collectionExists)
{
_logger.LogInformation("Criando collection {CollectionName}...", _settings.CollectionName);
var vectorsConfig = new VectorParams
{
Size = (ulong)_settings.VectorSize,
Distance = _settings.Distance.ToLower() switch
{
"cosine" => Distance.Cosine,
"euclid" => Distance.Euclid,
"dot" => Distance.Dot,
"manhattan" => Distance.Manhattan,
_ => Distance.Cosine
}
};
// Configurações HNSW otimizadas
if (_settings.HnswM > 0)
{
vectorsConfig.HnswConfig = new HnswConfigDiff
{
M = (ulong)_settings.HnswM,
EfConstruct = (ulong)_settings.HnswEfConstruct,
OnDisk = _settings.OnDisk
};
}
await _client.CreateCollectionAsync(
collectionName: _settings.CollectionName,
vectorsConfig: vectorsConfig
);
_collectionCache.TryAdd(_settings.CollectionName, true);
_logger.LogInformation("✅ Collection {CollectionName} criada", _settings.CollectionName);
}
_collectionInitialized = true;
}
finally
{
_initializationSemaphore.Release();
}
}
public async Task<List<VectorSearchResult>> SearchSimilarAsync(
double[] queryEmbedding,
string? projectId = null,
double threshold = 0.3,
int limit = 5,
Dictionary<string, object>? filters = null)
{
await EnsureCollectionExistsAsync();
try
{
var vector = queryEmbedding.Select(x => (float)x).ToArray();
Filter? filter = null;
if (!string.IsNullOrEmpty(projectId) || filters?.Any() == true)
{
var mustConditions = new List<Condition>();
if (!string.IsNullOrEmpty(projectId))
{
mustConditions.Add(MatchKeyword("project_id", projectId));
}
if (filters?.Any() == true)
{
foreach (var kvp in filters)
{
mustConditions.Add(MatchKeyword(kvp.Key, kvp.Value.ToString()!));
}
}
if (mustConditions.Any())
{
filter = new Filter();
filter.Must.AddRange(mustConditions);
}
}
var searchResult = await _client.SearchAsync(
collectionName: _settings.CollectionName,
vector: vector,
filter: filter,
limit: (ulong)limit,
scoreThreshold: (float)threshold,
payloadSelector: true,
vectorsSelector: false // Otimização: não buscar vetores desnecessariamente
);
return searchResult.Select(ConvertToVectorSearchResult).ToList();
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro na busca vetorial Qdrant");
throw;
}
}
public async Task<List<VectorSearchResult>> SearchSimilarDynamicAsync(
double[] queryEmbedding,
string projectId,
double minThreshold = 0.5,
int limit = 5)
{
var results = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold, limit);
if (results.Count < 3 && minThreshold > 0.2)
{
results = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold * 0.7, limit);
}
if (results.Count < 3)
{
results = await SearchSimilarAsync(queryEmbedding, projectId, 0.2, limit);
}
return results.Take(limit).ToList();
}
public async Task<string> AddDocumentAsync(
string title,
string content,
string projectId,
double[] embedding,
Dictionary<string, object>? metadata = null)
{
await EnsureCollectionExistsAsync();
try
{
var id = Guid.NewGuid().ToString();
var vector = embedding.Select(x => (float)x).ToArray();
var payload = CreatePayload(title, content, projectId, metadata, isUpdate: false);
var point = new PointStruct
{
Id = new PointId { Uuid = id },
Vectors = vector,
Payload = { payload }
};
await _client.UpsertAsync(
collectionName: _settings.CollectionName,
points: new[] { point }
);
_logger.LogDebug("Documento {Id} adicionado ao Qdrant", id);
return id;
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro ao adicionar documento no Qdrant");
throw;
}
}
public async Task UpdateDocumentAsync(
string id,
string title,
string content,
string projectId,
double[] embedding,
Dictionary<string, object>? metadata = null)
{
await EnsureCollectionExistsAsync();
try
{
var vector = embedding.Select(x => (float)x).ToArray();
var payload = CreatePayload(title, content, projectId, metadata, isUpdate: true);
var point = new PointStruct
{
Id = new PointId { Uuid = id },
Vectors = vector,
Payload = { payload }
};
await _client.UpsertAsync(
collectionName: _settings.CollectionName,
points: new[] { point }
);
_logger.LogDebug("Documento {Id} atualizado no Qdrant", id);
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro ao atualizar documento {Id} no Qdrant", id);
throw;
}
}
public async Task DeleteDocumentAsync(string id)
{
await EnsureCollectionExistsAsync();
try
{
var pointId = new PointId { Uuid = id };
await _client.DeleteAsync(
collectionName: _settings.CollectionName,
ids: new ulong[] { pointId.Num }
);
_logger.LogDebug("Documento {Id} removido do Qdrant", id);
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro ao remover documento {Id} do Qdrant", id);
throw;
}
}
public async Task<bool> DocumentExistsAsync(string id)
{
try
{
await EnsureCollectionExistsAsync();
var pointId = new PointId { Uuid = id };
var results = await _client.RetrieveAsync(
collectionName: _settings.CollectionName,
ids: new PointId[] { pointId },
withPayload: false, // Otimização: só queremos saber se existe
withVectors: false
);
return results.Any();
}
catch
{
return false;
}
}
public async Task<VectorSearchResult?> GetDocumentAsync(string id)
{
await EnsureCollectionExistsAsync();
try
{
var pointId = new PointId { Uuid = id };
var results = await _client.RetrieveAsync(
collectionName: _settings.CollectionName,
ids: new PointId[] { pointId },
withPayload: true,
withVectors: false
);
var point = results.FirstOrDefault();
return point != null ? ConvertToVectorSearchResult(point) : null;
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro ao recuperar documento {Id} do Qdrant", id);
return null;
}
}
public async Task<List<VectorSearchResult>> GetDocumentsByProjectAsync(string projectId)
{
await EnsureCollectionExistsAsync();
try
{
var filter = new Filter();
filter.Must.Add(MatchKeyword("project_id", projectId));
var scrollRequest = new ScrollPoints
{
CollectionName = _settings.CollectionName,
Filter = filter,
Limit = 10000,
WithPayload = true,
WithVectors = false // Otimização: não buscar vetores
};
var results = await _client.ScrollAsync(_settings.CollectionName, filter, 10000, null, true, false);
return results.Result.Select(ConvertToVectorSearchResult).ToList();
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro ao buscar documentos do projeto {ProjectId} no Qdrant", projectId);
throw;
}
}
public async Task<int> GetDocumentCountAsync(string? projectId = null)
{
await EnsureCollectionExistsAsync();
try
{
Filter? filter = null;
if (!string.IsNullOrEmpty(projectId))
{
filter = new Filter();
filter.Must.Add(MatchKeyword("project_id", projectId));
}
var result = await _client.CountAsync(_settings.CollectionName, filter);
return (int)result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro ao contar documentos no Qdrant");
return 0;
}
}
public async Task<bool> IsHealthyAsync()
{
try
{
var collections = await _client.ListCollectionsAsync();
return collections != null;
}
catch
{
return false;
}
}
public async Task<Dictionary<string, object>> GetStatsAsync()
{
try
{
await EnsureCollectionExistsAsync();
var collectionInfo = await _client.GetCollectionInfoAsync(_settings.CollectionName);
var totalDocs = await GetDocumentCountAsync();
return new Dictionary<string, object>
{
["provider"] = "Qdrant",
["total_documents"] = totalDocs,
["collection_name"] = _settings.CollectionName,
["vector_size"] = _settings.VectorSize,
["distance_metric"] = _settings.Distance,
["points_count"] = collectionInfo.PointsCount,
["segments_count"] = collectionInfo.SegmentsCount,
["health"] = await IsHealthyAsync(),
["last_check"] = DateTime.UtcNow
};
}
catch (Exception ex)
{
return new Dictionary<string, object>
{
["provider"] = "Qdrant",
["health"] = false,
["error"] = ex.Message,
["last_check"] = DateTime.UtcNow
};
}
}
// Métodos auxiliares otimizados
private static Dictionary<string, Value> CreatePayload(
string title,
string content,
string projectId,
Dictionary<string, object>? metadata,
bool isUpdate)
{
var payload = new Dictionary<string, Value>
{
["title"] = title,
["content"] = content,
["project_id"] = projectId
};
if (isUpdate)
{
payload["updated_at"] = DateTime.UtcNow.ToString("O");
}
else
{
payload["created_at"] = DateTime.UtcNow.ToString("O");
payload["updated_at"] = DateTime.UtcNow.ToString("O");
}
if (metadata?.Any() == true)
{
foreach (var kvp in metadata)
{
payload[$"meta_{kvp.Key}"] = ConvertToValue(kvp.Value);
}
}
return payload;
}
private static VectorSearchResult ConvertToVectorSearchResult(ScoredPoint point)
{
return new VectorSearchResult
{
Id = point.Id.Uuid ?? point.Id.Num.ToString(),
Title = GetStringFromPayload(point.Payload, "title"),
Content = GetStringFromPayload(point.Payload, "content"),
ProjectId = GetStringFromPayload(point.Payload, "project_id"),
Score = point.Score,
Provider = "Qdrant",
CreatedAt = GetDateTimeFromPayload(point.Payload, "created_at"),
UpdatedAt = GetDateTimeFromPayload(point.Payload, "updated_at"),
Metadata = ConvertPayloadToMetadata(point.Payload)
};
}
private static VectorSearchResult ConvertToVectorSearchResult(RetrievedPoint point)
{
return new VectorSearchResult
{
Id = point.Id.Uuid ?? point.Id.Num.ToString(),
Title = GetStringFromPayload(point.Payload, "title"),
Content = GetStringFromPayload(point.Payload, "content"),
ProjectId = GetStringFromPayload(point.Payload, "project_id"),
Score = 1.0,
Provider = "Qdrant",
CreatedAt = GetDateTimeFromPayload(point.Payload, "created_at"),
UpdatedAt = GetDateTimeFromPayload(point.Payload, "updated_at"),
Metadata = ConvertPayloadToMetadata(point.Payload)
};
}
private static Value ConvertToValue(object value)
{
return value switch
{
string s => s,
int i => i,
long l => l,
double d => d,
float f => f,
bool b => b,
DateTime dt => dt.ToString("O"),
_ => value?.ToString() ?? ""
};
}
private static string GetStringFromPayload(
IDictionary<string, Value> payload,
string key,
string defaultValue = "")
{
return payload.TryGetValue(key, out var value) ? value.StringValue : defaultValue;
}
private static DateTime GetDateTimeFromPayload(
IDictionary<string, Value> payload,
string key)
{
if (payload.TryGetValue(key, out var value) &&
DateTime.TryParse(value.StringValue, out var date))
{
return date;
}
return DateTime.UtcNow;
}
private static Dictionary<string, object>? ConvertPayloadToMetadata(
IDictionary<string, Value> payload)
{
var metadata = new Dictionary<string, object>();
foreach (var kvp in payload.Where(p => p.Key.StartsWith("meta_")))
{
var key = kvp.Key.Substring(5);
var value = kvp.Value;
metadata[key] = value.KindCase switch
{
Value.KindOneofCase.StringValue => value.StringValue,
Value.KindOneofCase.IntegerValue => value.IntegerValue,
Value.KindOneofCase.DoubleValue => value.DoubleValue,
Value.KindOneofCase.BoolValue => value.BoolValue,
_ => value.StringValue
};
}
return metadata.Any() ? metadata : null;
}
public void Dispose()
{
_initializationSemaphore?.Dispose();
_client?.Dispose();
}
}
}
#pragma warning restore SKEXP0001