527 lines
18 KiB
C#
527 lines
18 KiB
C#
using ChatRAG.Contracts.VectorSearch;
|
|
using ChatRAG.Settings.ChatRAG.Configuration;
|
|
using Microsoft.Extensions.Options;
|
|
using Qdrant.Client.Grpc;
|
|
using ChatRAG.Models;
|
|
using ChatRAG.Services.Contracts;
|
|
using Qdrant.Client;
|
|
using static Qdrant.Client.Grpc.Conditions;
|
|
using System.Drawing;
|
|
|
|
#pragma warning disable SKEXP0001
|
|
|
|
|
|
namespace ChatRAG.Services.SearchVectors
|
|
{
|
|
public class QdrantVectorSearchService : IVectorSearchService
|
|
{
|
|
private readonly QdrantClient _client;
|
|
private readonly QdrantSettings _settings;
|
|
private readonly ILogger<QdrantVectorSearchService> _logger;
|
|
private bool _collectionInitialized = false;
|
|
|
|
public QdrantVectorSearchService(
|
|
IOptions<VectorDatabaseSettings> settings,
|
|
ILogger<QdrantVectorSearchService> logger)
|
|
{
|
|
_settings = settings.Value.Qdrant;
|
|
_logger = logger;
|
|
|
|
_client = new QdrantClient(_settings.Host, _settings.Port, https: _settings.UseTls);
|
|
|
|
_logger.LogInformation("QdrantVectorSearchService inicializado para {Host}:{Port}",
|
|
_settings.Host, _settings.Port);
|
|
}
|
|
|
|
private async Task EnsureCollectionExistsAsync()
|
|
{
|
|
if (_collectionInitialized) return;
|
|
|
|
try
|
|
{
|
|
var collectionExists = await _client.CollectionExistsAsync(_settings.CollectionName);
|
|
|
|
if (!collectionExists)
|
|
{
|
|
_logger.LogInformation("Criando collection {CollectionName}...", _settings.CollectionName);
|
|
|
|
var vectorsConfig = new VectorParams
|
|
{
|
|
Size = (ulong)_settings.VectorSize,
|
|
Distance = _settings.Distance.ToLower() switch
|
|
{
|
|
"cosine" => Distance.Cosine,
|
|
"euclid" => Distance.Euclid,
|
|
"dot" => Distance.Dot,
|
|
"manhattan" => Distance.Manhattan,
|
|
_ => Distance.Cosine
|
|
}
|
|
};
|
|
|
|
// Configurações HNSW opcionais
|
|
if (_settings.HnswM > 0)
|
|
{
|
|
vectorsConfig.HnswConfig = new HnswConfigDiff
|
|
{
|
|
M = (ulong)_settings.HnswM,
|
|
EfConstruct = (ulong)_settings.HnswEfConstruct,
|
|
OnDisk = _settings.OnDisk
|
|
};
|
|
}
|
|
|
|
await _client.CreateCollectionAsync(
|
|
collectionName: _settings.CollectionName,
|
|
vectorsConfig: vectorsConfig
|
|
);
|
|
|
|
_logger.LogInformation("✅ Collection {CollectionName} criada", _settings.CollectionName);
|
|
}
|
|
|
|
_collectionInitialized = true;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao inicializar collection {CollectionName}", _settings.CollectionName);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<List<VectorSearchResult>> SearchSimilarAsync(
|
|
double[] queryEmbedding,
|
|
string? projectId = null,
|
|
double threshold = 0.3,
|
|
int limit = 5,
|
|
Dictionary<string, object>? filters = null)
|
|
{
|
|
await EnsureCollectionExistsAsync();
|
|
|
|
try
|
|
{
|
|
var vector = queryEmbedding.Select(x => (float)x).ToArray();
|
|
|
|
Filter? filter = null;
|
|
if (!string.IsNullOrEmpty(projectId) || filters?.Any() == true)
|
|
{
|
|
var mustConditions = new List<Condition>();
|
|
|
|
if (!string.IsNullOrEmpty(projectId))
|
|
{
|
|
mustConditions.Add(MatchKeyword("project_id", projectId));
|
|
}
|
|
|
|
if (filters?.Any() == true)
|
|
{
|
|
foreach (var kvp in filters)
|
|
{
|
|
mustConditions.Add(MatchKeyword(kvp.Key, kvp.Value.ToString()!));
|
|
}
|
|
}
|
|
|
|
if (mustConditions.Any())
|
|
{
|
|
filter = new Filter();
|
|
filter.Must.AddRange(mustConditions);
|
|
}
|
|
}
|
|
|
|
var searchResult = await _client.SearchAsync(
|
|
collectionName: _settings.CollectionName,
|
|
vector: vector,
|
|
filter: filter,
|
|
limit: (ulong)limit,
|
|
scoreThreshold: (float)threshold,
|
|
payloadSelector: true,
|
|
vectorsSelector: true
|
|
);
|
|
|
|
return searchResult.Select(point => new VectorSearchResult
|
|
{
|
|
Id = point.Id.Uuid ?? point.Id.Num.ToString(),
|
|
Title = GetStringFromPayload(point.Payload, "title"),
|
|
Content = GetStringFromPayload(point.Payload, "content"),
|
|
ProjectId = GetStringFromPayload(point.Payload, "project_id"),
|
|
Score = point.Score,
|
|
Provider = "Qdrant",
|
|
CreatedAt = GetDateTimeFromPayload(point.Payload, "created_at"),
|
|
UpdatedAt = GetDateTimeFromPayload(point.Payload, "updated_at"),
|
|
Metadata = ConvertPayloadToMetadata(point.Payload)
|
|
}).ToList();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro na busca vetorial Qdrant");
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<List<VectorSearchResult>> SearchSimilarDynamicAsync(
|
|
double[] queryEmbedding,
|
|
string projectId,
|
|
double minThreshold = 0.5,
|
|
int limit = 5)
|
|
{
|
|
var results = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold, limit);
|
|
|
|
if (results.Count < 3 && minThreshold > 0.2)
|
|
{
|
|
results = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold * 0.7, limit);
|
|
}
|
|
|
|
if (results.Count < 3)
|
|
{
|
|
results = await SearchSimilarAsync(queryEmbedding, projectId, 0.2, limit);
|
|
}
|
|
|
|
return results.Take(limit).ToList();
|
|
}
|
|
|
|
public async Task<string> AddDocumentAsync(
|
|
string title,
|
|
string content,
|
|
string projectId,
|
|
double[] embedding,
|
|
Dictionary<string, object>? metadata = null)
|
|
{
|
|
await EnsureCollectionExistsAsync();
|
|
|
|
try
|
|
{
|
|
var id = Guid.NewGuid().ToString();
|
|
var vector = embedding.Select(x => (float)x).ToArray();
|
|
|
|
var payload = new Dictionary<string, Value>
|
|
{
|
|
["title"] = title,
|
|
["content"] = content,
|
|
["project_id"] = projectId,
|
|
["created_at"] = DateTime.UtcNow.ToString("O"),
|
|
["updated_at"] = DateTime.UtcNow.ToString("O")
|
|
};
|
|
|
|
if (metadata?.Any() == true)
|
|
{
|
|
foreach (var kvp in metadata)
|
|
{
|
|
payload[$"meta_{kvp.Key}"] = ConvertToValue(kvp.Value);
|
|
}
|
|
}
|
|
|
|
var point = new PointStruct
|
|
{
|
|
Id = new PointId { Uuid = id },
|
|
Vectors = vector,
|
|
Payload = { payload }
|
|
};
|
|
|
|
await _client.UpsertAsync(
|
|
collectionName: _settings.CollectionName,
|
|
points: new[] { point }
|
|
);
|
|
|
|
_logger.LogDebug("Documento {Id} adicionado ao Qdrant", id);
|
|
return id;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao adicionar documento no Qdrant");
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task UpdateDocumentAsync(
|
|
string id,
|
|
string title,
|
|
string content,
|
|
string projectId,
|
|
double[] embedding,
|
|
Dictionary<string, object>? metadata = null)
|
|
{
|
|
await EnsureCollectionExistsAsync();
|
|
|
|
try
|
|
{
|
|
var vector = embedding.Select(x => (float)x).ToArray();
|
|
|
|
var payload = new Dictionary<string, Value>
|
|
{
|
|
["title"] = title,
|
|
["content"] = content,
|
|
["project_id"] = projectId,
|
|
["updated_at"] = DateTime.UtcNow.ToString("O")
|
|
};
|
|
|
|
if (metadata?.Any() == true)
|
|
{
|
|
foreach (var kvp in metadata)
|
|
{
|
|
payload[$"meta_{kvp.Key}"] = ConvertToValue(kvp.Value);
|
|
}
|
|
}
|
|
|
|
var point = new PointStruct
|
|
{
|
|
Id = new PointId { Uuid = id },
|
|
Vectors = vector,
|
|
Payload = { payload }
|
|
};
|
|
|
|
await _client.UpsertAsync(
|
|
collectionName: _settings.CollectionName,
|
|
points: new[] { point }
|
|
);
|
|
|
|
_logger.LogDebug("Documento {Id} atualizado no Qdrant", id);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao atualizar documento {Id} no Qdrant", id);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task DeleteDocumentAsync(string id)
|
|
{
|
|
await EnsureCollectionExistsAsync();
|
|
|
|
try
|
|
{
|
|
var pointId = new PointId { Uuid = id } ;
|
|
|
|
await _client.DeleteAsync(
|
|
collectionName: _settings.CollectionName,
|
|
ids: new ulong[] { pointId.Num }
|
|
);
|
|
|
|
_logger.LogDebug("Documento {Id} removido do Qdrant", id);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao remover documento {Id} do Qdrant", id);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<bool> DocumentExistsAsync(string id)
|
|
{
|
|
try
|
|
{
|
|
var result = await GetDocumentAsync(id);
|
|
return result != null;
|
|
}
|
|
catch
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public async Task<VectorSearchResult?> GetDocumentAsync(string id)
|
|
{
|
|
await EnsureCollectionExistsAsync();
|
|
|
|
try
|
|
{
|
|
var pointId = new PointId { Uuid = id };
|
|
|
|
var results = await _client.RetrieveAsync(
|
|
collectionName: _settings.CollectionName,
|
|
ids: new PointId[] { pointId },
|
|
withPayload: true,
|
|
withVectors: false
|
|
);
|
|
|
|
var point = results.FirstOrDefault();
|
|
if (point == null) return null;
|
|
|
|
return new VectorSearchResult
|
|
{
|
|
Id = point.Id.Uuid ?? point.Id.Num.ToString(),
|
|
Title = GetStringFromPayload(point.Payload, "title"),
|
|
Content = GetStringFromPayload(point.Payload, "content"),
|
|
ProjectId = GetStringFromPayload(point.Payload, "project_id"),
|
|
Score = 1.0,
|
|
Provider = "Qdrant",
|
|
CreatedAt = GetDateTimeFromPayload(point.Payload, "created_at"),
|
|
UpdatedAt = GetDateTimeFromPayload(point.Payload, "updated_at"),
|
|
Metadata = ConvertPayloadToMetadata(point.Payload)
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao recuperar documento {Id} do Qdrant", id);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
public async Task<List<VectorSearchResult>> GetDocumentsByProjectAsync(string projectId)
|
|
{
|
|
await EnsureCollectionExistsAsync();
|
|
|
|
try
|
|
{
|
|
var filter = new Filter();
|
|
filter.Must.Add(MatchKeyword("project_id", projectId));
|
|
|
|
var results = await _client.ScrollAsync(
|
|
collectionName: _settings.CollectionName,
|
|
filter: filter,
|
|
limit: 10000,
|
|
payloadSelector: true,
|
|
vectorsSelector: true
|
|
);
|
|
|
|
return results.Result.Select(point => new VectorSearchResult
|
|
{
|
|
Id = point.Id.Uuid ?? point.Id.Num.ToString(),
|
|
Title = GetStringFromPayload(point.Payload, "title"),
|
|
Content = GetStringFromPayload(point.Payload, "content"),
|
|
ProjectId = GetStringFromPayload(point.Payload, "project_id"),
|
|
Score = 1.0,
|
|
Provider = "Qdrant",
|
|
CreatedAt = GetDateTimeFromPayload(point.Payload, "created_at"),
|
|
UpdatedAt = GetDateTimeFromPayload(point.Payload, "updated_at"),
|
|
Metadata = ConvertPayloadToMetadata(point.Payload)
|
|
}).ToList();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao buscar documentos do projeto {ProjectId} no Qdrant", projectId);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<int> GetDocumentCountAsync(string? projectId = null)
|
|
{
|
|
await EnsureCollectionExistsAsync();
|
|
|
|
try
|
|
{
|
|
Filter? filter = null;
|
|
if (!string.IsNullOrEmpty(projectId))
|
|
{
|
|
filter = new Filter();
|
|
filter.Must.Add(MatchKeyword("project_id", projectId));
|
|
}
|
|
|
|
var result = await _client.CountAsync(_settings.CollectionName, filter);
|
|
return (int)result;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Erro ao contar documentos no Qdrant");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
public async Task<bool> IsHealthyAsync()
|
|
{
|
|
try
|
|
{
|
|
var collections = await _client.ListCollectionsAsync();
|
|
return collections != null;
|
|
}
|
|
catch
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public async Task<Dictionary<string, object>> GetStatsAsync()
|
|
{
|
|
try
|
|
{
|
|
await EnsureCollectionExistsAsync();
|
|
|
|
var collectionInfo = await _client.GetCollectionInfoAsync(_settings.CollectionName);
|
|
var totalDocs = await GetDocumentCountAsync();
|
|
|
|
return new Dictionary<string, object>
|
|
{
|
|
["provider"] = "Qdrant",
|
|
["total_documents"] = totalDocs,
|
|
["collection_name"] = _settings.CollectionName,
|
|
["vector_size"] = _settings.VectorSize,
|
|
["distance_metric"] = _settings.Distance,
|
|
["points_count"] = collectionInfo.PointsCount,
|
|
["segments_count"] = collectionInfo.SegmentsCount,
|
|
["health"] = await IsHealthyAsync(),
|
|
["last_check"] = DateTime.UtcNow
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new Dictionary<string, object>
|
|
{
|
|
["provider"] = "Qdrant",
|
|
["health"] = false,
|
|
["error"] = ex.Message,
|
|
["last_check"] = DateTime.UtcNow
|
|
};
|
|
}
|
|
}
|
|
|
|
private static Value ConvertToValue(object value)
|
|
{
|
|
return value switch
|
|
{
|
|
string s => s,
|
|
int i => i,
|
|
long l => l,
|
|
double d => d,
|
|
float f => f,
|
|
bool b => b,
|
|
DateTime dt => dt.ToString("O"),
|
|
_ => value?.ToString() ?? ""
|
|
};
|
|
}
|
|
|
|
private static string GetStringFromPayload(
|
|
IDictionary<string, Value> payload,
|
|
string key,
|
|
string defaultValue = "")
|
|
{
|
|
return payload.TryGetValue(key, out var value) ? value.StringValue : defaultValue;
|
|
}
|
|
|
|
private static DateTime GetDateTimeFromPayload(
|
|
IDictionary<string, Value> payload,
|
|
string key)
|
|
{
|
|
if (payload.TryGetValue(key, out var value) &&
|
|
DateTime.TryParse(value.StringValue, out var date))
|
|
{
|
|
return date;
|
|
}
|
|
return DateTime.UtcNow;
|
|
}
|
|
|
|
private static Dictionary<string, object>? ConvertPayloadToMetadata(
|
|
IDictionary<string, Value> payload)
|
|
{
|
|
var metadata = new Dictionary<string, object>();
|
|
|
|
foreach (var kvp in payload.Where(p => p.Key.StartsWith("meta_")))
|
|
{
|
|
var key = kvp.Key.Substring(5);
|
|
var value = kvp.Value;
|
|
|
|
metadata[key] = value.KindCase switch
|
|
{
|
|
Value.KindOneofCase.StringValue => value.StringValue,
|
|
Value.KindOneofCase.IntegerValue => value.IntegerValue,
|
|
Value.KindOneofCase.DoubleValue => value.DoubleValue,
|
|
Value.KindOneofCase.BoolValue => value.BoolValue,
|
|
_ => value.StringValue
|
|
};
|
|
}
|
|
|
|
return metadata.Any() ? metadata : null;
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
_client?.Dispose();
|
|
}
|
|
}
|
|
}
|
|
|
|
#pragma warning restore SKEXP0001 |