using ChatRAG.Contracts.VectorSearch; using ChatRAG.Models; using Microsoft.Extensions.Options; using System.Text; using System.Text.Json; using Microsoft.SemanticKernel.Embeddings; using ChatRAG.Settings.ChatRAG.Configuration; namespace ChatRAG.Services.SearchVectors { public class ChromaVectorSearchService : IVectorSearchService { private readonly HttpClient _httpClient; private readonly ILogger _logger; private readonly ChromaSettings _settings; private readonly string _collectionName; public ChromaVectorSearchService( IOptions settings, ILogger logger, HttpClient httpClient) { _settings = settings.Value.Chroma ?? throw new ArgumentNullException("Chroma settings not configured"); _logger = logger; _httpClient = httpClient; _httpClient.BaseAddress = new Uri($"http://{_settings.Host}:{_settings.Port}"); _collectionName = _settings.CollectionName; InitializeAsync().GetAwaiter().GetResult(); } private async Task InitializeAsync() { try { // Verificar se a collection existe, se não, criar var collections = await GetCollectionsAsync(); if (!collections.Contains(_collectionName)) { await CreateCollectionAsync(); } } catch (Exception ex) { _logger.LogError(ex, "Erro ao inicializar Chroma"); throw; } } // ======================================== // BUSCA VETORIAL // ======================================== public async Task> SearchSimilarAsync( double[] queryEmbedding, string? projectId = null, double threshold = 0.3, int limit = 5, Dictionary? filters = null) { try { // Construir filtros WHERE var whereClause = BuildWhereClause(projectId, filters); var query = new { query_embeddings = new[] { queryEmbedding }, n_results = limit, where = whereClause, include = new[] { "documents", "metadatas", "distances" } }; var json = JsonSerializer.Serialize(query); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"/api/v1/collections/{_collectionName}/query", content); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); _logger.LogError("Erro na busca Chroma: {Error}", error); return new List(); } var result = await response.Content.ReadAsStringAsync(); var queryResult = JsonSerializer.Deserialize(result); return ParseQueryResults(queryResult, threshold); } catch (Exception ex) { _logger.LogError(ex, "Erro ao buscar similares no Chroma"); return new List(); } } public async Task> SearchSimilarDynamicAsync( double[] queryEmbedding, string projectId, double minThreshold = 0.5, int limit = 5) { // Estratégia 1: Busca com threshold alto var results = await SearchSimilarAsync(queryEmbedding, projectId, minThreshold, limit); if (results.Count >= limit) { return results.Take(limit).ToList(); } // Estratégia 2: Relaxar threshold se não conseguiu o suficiente if (results.Count < limit && minThreshold > 0.35) { var mediumResults = await SearchSimilarAsync(queryEmbedding, projectId, 0.35, limit * 2); if (mediumResults.Count >= limit) { return mediumResults.Take(limit).ToList(); } results = mediumResults; } // Estratégia 3: Threshold baixo como último recurso if (results.Count < limit && minThreshold > 0.2) { var lowResults = await SearchSimilarAsync(queryEmbedding, projectId, 0.2, limit * 3); results = lowResults; } return results.Take(limit).ToList(); } // ======================================== // CRUD DE DOCUMENTOS // ======================================== public async Task AddDocumentAsync( string title, string content, string projectId, double[] embedding, Dictionary? metadata = null) { try { var documentId = Guid.NewGuid().ToString(); var combinedMetadata = new Dictionary { ["title"] = title, ["project_id"] = projectId, ["created_at"] = DateTime.UtcNow.ToString("O") }; if (metadata != null) { foreach (var kvp in metadata) { combinedMetadata[kvp.Key] = kvp.Value; } } var document = new { ids = new[] { documentId }, documents = new[] { content }, metadatas = new[] { combinedMetadata }, embeddings = new[] { embedding } }; var json = JsonSerializer.Serialize(document); var requestContent = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"/api/v1/collections/{_collectionName}/add", requestContent); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); throw new Exception($"Erro ao adicionar documento: {error}"); } return documentId; } catch (Exception ex) { _logger.LogError(ex, "Erro ao adicionar documento no Chroma"); throw; } } public async Task UpdateDocumentAsync( string id, string title, string content, string projectId, double[] embedding, Dictionary? metadata = null) { try { // Chroma não tem update direto, então fazemos delete + add await DeleteDocumentAsync(id); var combinedMetadata = new Dictionary { ["title"] = title, ["project_id"] = projectId, ["updated_at"] = DateTime.UtcNow.ToString("O") }; if (metadata != null) { foreach (var kvp in metadata) { combinedMetadata[kvp.Key] = kvp.Value; } } var document = new { ids = new[] { id }, documents = new[] { content }, metadatas = new[] { combinedMetadata }, embeddings = new[] { embedding } }; var json = JsonSerializer.Serialize(document); var requestContent = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"/api/v1/collections/{_collectionName}/upsert", requestContent); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); throw new Exception($"Erro ao atualizar documento: {error}"); } } catch (Exception ex) { _logger.LogError(ex, "Erro ao atualizar documento no Chroma"); throw; } } public async Task DeleteDocumentAsync(string id) { try { var deleteRequest = new { ids = new[] { id } }; var json = JsonSerializer.Serialize(deleteRequest); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"/api/v1/collections/{_collectionName}/delete", content); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); _logger.LogWarning("Erro ao deletar documento {Id}: {Error}", id, error); } } catch (Exception ex) { _logger.LogError(ex, "Erro ao deletar documento {Id} no Chroma", id); throw; } } // ======================================== // CONSULTAS AUXILIARES // ======================================== public async Task DocumentExistsAsync(string id) { try { var doc = await GetDocumentAsync(id); return doc != null; } catch { return false; } } public async Task GetDocumentAsync(string id) { try { var query = new { ids = new[] { id }, include = new[] { "documents", "metadatas" } }; var json = JsonSerializer.Serialize(query); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"/api/v1/collections/{_collectionName}/get", content); if (!response.IsSuccessStatusCode) { return null; } var result = await response.Content.ReadAsStringAsync(); var getResult = JsonSerializer.Deserialize(result); if (getResult?.ids?.Length > 0) { return new VectorSearchResult { Id = getResult.ids[0], Content = getResult.documents?[0] ?? "", Score = 1.0, Metadata = getResult.metadatas?[0] }; } return null; } catch (Exception ex) { _logger.LogError(ex, "Erro ao buscar documento {Id} no Chroma", id); return null; } } public async Task> GetDocumentsByProjectAsync(string projectId) { try { var query = new { where = new { project_id = projectId }, include = new[] { "documents", "metadatas" } }; var json = JsonSerializer.Serialize(query); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"/api/v1/collections/{_collectionName}/get", content); if (!response.IsSuccessStatusCode) { var error = await response.Content.ReadAsStringAsync(); _logger.LogError("Erro ao buscar documentos do projeto {ProjectId}: {Error}", projectId, error); return new List(); } var result = await response.Content.ReadAsStringAsync(); var getResult = JsonSerializer.Deserialize(result); var results = new List(); if (getResult?.documents?.Length > 0) { for (int i = 0; i < getResult.documents.Length; i++) { results.Add(new VectorSearchResult { Id = getResult.ids[i], Content = getResult.documents[i], Score = 1.0, // Todos os documentos do projeto Metadata = getResult.metadatas?[i] }); } } return results; } catch (Exception ex) { _logger.LogError(ex, "Erro ao buscar documentos do projeto {ProjectId} no Chroma", projectId); return new List(); } } public async Task GetDocumentCountAsync(string? projectId = null) { try { var query = new { where = projectId != null ? new { project_id = projectId } : null }; var json = JsonSerializer.Serialize(query); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"/api/v1/collections/{_collectionName}/count", content); if (!response.IsSuccessStatusCode) { _logger.LogWarning("Erro ao contar documentos no Chroma"); return 0; } var result = await response.Content.ReadAsStringAsync(); var countResult = JsonSerializer.Deserialize(result); return countResult?.count ?? 0; } catch (Exception ex) { _logger.LogError(ex, "Erro ao contar documentos no Chroma"); return 0; } } // ======================================== // HEALTH CHECK E MÉTRICAS // ======================================== public async Task IsHealthyAsync() { try { var response = await _httpClient.GetAsync("/api/v1/heartbeat"); return response.IsSuccessStatusCode; } catch (Exception ex) { _logger.LogError(ex, "Erro no health check do Chroma"); return false; } } public async Task> GetStatsAsync() { try { var stats = new Dictionary { ["provider"] = "Chroma", ["collection"] = _collectionName, ["host"] = _settings.Host, ["port"] = _settings.Port }; // Tentar obter informações da collection var response = await _httpClient.GetAsync($"/api/v1/collections/{_collectionName}"); if (response.IsSuccessStatusCode) { var content = await response.Content.ReadAsStringAsync(); var collectionInfo = JsonSerializer.Deserialize>(content); if (collectionInfo != null) { stats["collection_info"] = collectionInfo; } } // Contar documentos totais stats["total_documents"] = await GetDocumentCountAsync(); return stats; } catch (Exception ex) { _logger.LogError(ex, "Erro ao obter stats do Chroma"); return new Dictionary { ["provider"] = "Chroma", ["error"] = ex.Message, ["status"] = "error" }; } } // ======================================== // MÉTODOS AUXILIARES PRIVADOS // ======================================== private async Task GetCollectionsAsync() { try { var response = await _httpClient.GetAsync("/api/v1/collections"); if (!response.IsSuccessStatusCode) { _logger.LogWarning("Erro ao obter collections: {StatusCode}", response.StatusCode); return Array.Empty(); } var content = await response.Content.ReadAsStringAsync(); // Tentar desserializar como array de strings (versão simples) try { var collections = JsonSerializer.Deserialize(content); return collections ?? Array.Empty(); } catch { // Tentar desserializar como array de objetos (versão mais nova) try { var collectionsObj = JsonSerializer.Deserialize(content); return collectionsObj?.Select(c => c.name).ToArray() ?? Array.Empty(); } catch { _logger.LogWarning("Não foi possível parsear lista de collections"); return Array.Empty(); } } } catch (Exception ex) { _logger.LogError(ex, "Erro ao buscar collections"); return Array.Empty(); } } // Classe auxiliar para desserialização private class CollectionInfo { public string name { get; set; } = ""; public Dictionary? metadata { get; set; } } private async Task CreateCollectionAsync() { var collection = new { name = _collectionName, metadata = new { description = "RAG Collection", created_at = DateTime.UtcNow.ToString("O") } }; var json = JsonSerializer.Serialize(collection); var content = new StringContent(json, Encoding.UTF8, "application/json"); // Tentar primeira abordagem (versão mais nova) var response = await _httpClient.PostAsync("/api/v1/collections", content); // Se falhar, tentar segunda abordagem (criar collection via get_or_create) if (!response.IsSuccessStatusCode) { _logger.LogWarning("Método POST falhou, tentando abordagem alternativa"); // Criar usando get_or_create approach var createPayload = new { name = _collectionName, metadata = new { description = "RAG Collection", created_at = DateTime.UtcNow.ToString("O") }, get_or_create = true }; var createJson = JsonSerializer.Serialize(createPayload); var createContent = new StringContent(createJson, Encoding.UTF8, "application/json"); var createResponse = await _httpClient.PostAsync("/api/v1/collections", createContent); if (!createResponse.IsSuccessStatusCode) { var error = await createResponse.Content.ReadAsStringAsync(); _logger.LogError("Erro ao criar collection: {Error}", error); // Última tentativa: assumir que collection já existe _logger.LogWarning("Assumindo que collection {CollectionName} já existe", _collectionName); return; } } _logger.LogInformation("Collection {CollectionName} criada/verificada com sucesso", _collectionName); } private object? BuildWhereClause(string? projectId, Dictionary? filters) { var where = new Dictionary(); if (!string.IsNullOrEmpty(projectId)) { where["project_id"] = projectId; } if (filters != null) { foreach (var filter in filters) { where[filter.Key] = filter.Value; } } return where.Any() ? where : null; } private List ParseQueryResults(ChromaQueryResult? queryResult, double threshold) { var results = new List(); if (queryResult?.documents?.Length > 0 && queryResult.documents[0].Length > 0) { for (int i = 0; i < queryResult.documents[0].Length; i++) { var distance = queryResult.distances?[0][i] ?? 1.0; // Chroma retorna distâncias, converter para similaridade (1 - distance) var similarity = 1.0 - distance; if (similarity >= threshold) { results.Add(new VectorSearchResult { Id = queryResult.ids[0][i], Content = queryResult.documents[0][i], Score = similarity, Metadata = queryResult.metadatas?[0][i] }); } } } return results.OrderByDescending(r => r.Score).ToList(); } } // ======================================== // DTOs PARA CHROMA API // ======================================== public class ChromaQueryResult { public string[][] ids { get; set; } = Array.Empty(); public string[][] documents { get; set; } = Array.Empty(); public double[][]? distances { get; set; } public Dictionary[][]? metadatas { get; set; } } public class ChromaGetResult { public string[] ids { get; set; } = Array.Empty(); public string[] documents { get; set; } = Array.Empty(); public Dictionary[]? metadatas { get; set; } } public class ChromaCountResult { public int count { get; set; } } }