ChatRAG/Data/TextData.cs
2025-06-15 21:34:47 -03:00

288 lines
9.8 KiB
C#

using ChatRAG.Data;
using ChatRAG.Models;
using ChatRAG.Services.Contracts;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Embeddings;
using System.Text;
#pragma warning disable SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
namespace ChatApi.Data
{
public class TextData : ITextDataService
{
private readonly ITextEmbeddingGenerationService _textEmbeddingGenerationService;
private readonly TextDataRepository _textDataService;
public TextData(ITextEmbeddingGenerationService textEmbeddingGenerationService, TextDataRepository textDataService)
{
_textEmbeddingGenerationService = textEmbeddingGenerationService;
_textDataService = textDataService;
}
public string ProviderName => "MongoDB";
// ========================================
// MÉTODOS ORIGINAIS (já implementados)
// ========================================
public async Task SalvarTextoComEmbeddingNoMongoDB(string textoCompleto, string projectId)
{
var textoArray = new List<string>();
string[] textolinhas = textoCompleto.Split(
new string[] { "\n" },
StringSplitOptions.None
);
var title = textolinhas[0];
var builder = new StringBuilder();
foreach (string line in textolinhas)
{
if (line.StartsWith("**") || line.StartsWith("\r**"))
{
if (builder.Length > 0)
{
textoArray.Add(title.Replace("**", "").Replace("\r", "") + ": " + Environment.NewLine + builder.ToString());
builder = new StringBuilder();
title = line;
}
}
else
{
builder.AppendLine(line);
}
}
foreach (var item in textoArray)
{
await SalvarNoMongoDB(title, item, projectId);
}
}
public async Task SalvarNoMongoDB(string titulo, string texto, string projectId)
{
await SalvarNoMongoDB(null, titulo, texto, projectId);
}
public async Task SalvarNoMongoDB(string? id, string titulo, string texto, string projectId)
{
var conteudo = $"**{titulo}** \n\n {texto}";
// Gerar embedding para o texto
var embedding = await _textEmbeddingGenerationService.GenerateEmbeddingAsync(conteudo);
// Converter embedding para um formato serializável (como um array de floats)
var embeddingArray = embedding.ToArray().Select(e => (double)e).ToArray();
var exists = id != null ? await this.GetById(id) : null;
if (exists == null)
{
var documento = new TextoComEmbedding
{
Id = id ?? Guid.NewGuid().ToString(),
Titulo = titulo,
Conteudo = texto,
ProjetoId = projectId,
Embedding = embeddingArray
};
await _textDataService.CreateAsync(documento);
}
else
{
var documento = new TextoComEmbedding
{
Id = id!,
Titulo = titulo,
Conteudo = texto,
ProjetoId = projectId,
Embedding = embeddingArray
};
await _textDataService.UpdateAsync(id!, documento);
}
}
public async Task<IEnumerable<TextoComEmbedding>> GetAll()
{
return await _textDataService.GetAsync();
}
public async Task<IEnumerable<TextoComEmbedding>> GetByPorjectId(string porjectId)
{
return await _textDataService.GetByProjectIdAsync(porjectId);
}
public async Task<TextoComEmbedding> GetById(string id)
{
return (await _textDataService.GetAsync(id))!;
}
// ========================================
// MÉTODOS NOVOS DA INTERFACE (implementação completa)
// ========================================
public async Task<string> SaveDocumentAsync(DocumentInput document)
{
var id = document.Id ?? Guid.NewGuid().ToString();
await SalvarNoMongoDB(id, document.Title, document.Content, document.ProjectId);
return id;
}
public async Task UpdateDocumentAsync(string id, DocumentInput document)
{
await SalvarNoMongoDB(id, document.Title, document.Content, document.ProjectId);
}
public async Task DeleteDocumentAsync(string id)
{
await _textDataService.RemoveAsync(id);
}
public async Task<bool> DocumentExistsAsync(string id)
{
try
{
var doc = await GetById(id);
return doc != null;
}
catch
{
return false;
}
}
public async Task<DocumentOutput?> GetDocumentAsync(string id)
{
try
{
var doc = await GetById(id);
if (doc == null) return null;
return new DocumentOutput
{
Id = doc.Id,
Title = doc.Titulo,
Content = doc.Conteudo,
ProjectId = doc.ProjetoId,
Embedding = doc.Embedding,
CreatedAt = DateTime.UtcNow,
UpdatedAt = DateTime.UtcNow,
Metadata = new Dictionary<string, object>
{
["source"] = "MongoDB",
["has_embedding"] = doc.Embedding != null,
["embedding_size"] = doc.Embedding?.Length ?? 0
}
};
}
catch
{
return null;
}
}
public async Task<List<DocumentOutput>> GetDocumentsByProjectAsync(string projectId)
{
var docs = await GetByPorjectId(projectId);
return docs.Select(doc => new DocumentOutput
{
Id = doc.Id,
Title = doc.Titulo,
Content = doc.Conteudo,
ProjectId = doc.ProjetoId,
Embedding = doc.Embedding,
CreatedAt = DateTime.UtcNow,
UpdatedAt = DateTime.UtcNow,
Metadata = new Dictionary<string, object>
{
["source"] = "MongoDB",
["has_embedding"] = doc.Embedding != null,
["embedding_size"] = doc.Embedding?.Length ?? 0
}
}).ToList();
}
public async Task<int> GetDocumentCountAsync(string? projectId = null)
{
if (string.IsNullOrEmpty(projectId))
{
var all = await GetAll();
return all.Count();
}
else
{
var byProject = await GetByPorjectId(projectId);
return byProject.Count();
}
}
public async Task<List<string>> SaveDocumentsBatchAsync(List<DocumentInput> documents)
{
var ids = new List<string>();
foreach (var doc in documents)
{
var id = await SaveDocumentAsync(doc);
ids.Add(id);
}
return ids;
}
public async Task DeleteDocumentsBatchAsync(List<string> ids)
{
foreach (var id in ids)
{
await DeleteDocumentAsync(id);
}
}
public async Task<Dictionary<string, object>> GetProviderStatsAsync()
{
try
{
var totalDocs = await GetDocumentCountAsync();
var allDocs = await GetAll();
var docsWithEmbedding = allDocs.Count(d => d.Embedding != null && d.Embedding.Length > 0);
var avgContentLength = allDocs.Any() ? allDocs.Average(d => d.Conteudo?.Length ?? 0) : 0;
var projectStats = allDocs
.GroupBy(d => d.ProjetoId)
.ToDictionary(
g => g.Key ?? "unknown",
g => g.Count()
);
return new Dictionary<string, object>
{
["provider"] = "MongoDB",
["total_documents"] = totalDocs,
["documents_with_embedding"] = docsWithEmbedding,
["embedding_coverage"] = totalDocs > 0 ? (double)docsWithEmbedding / totalDocs : 0,
["average_content_length"] = Math.Round(avgContentLength, 1),
["projects_count"] = projectStats.Count,
["documents_by_project"] = projectStats,
["health"] = "ok",
["last_check"] = DateTime.UtcNow,
["connection_status"] = "connected"
};
}
catch (Exception ex)
{
return new Dictionary<string, object>
{
["provider"] = "MongoDB",
["health"] = "error",
["error"] = ex.Message,
["last_check"] = DateTime.UtcNow,
["connection_status"] = "error"
};
}
}
}
}
#pragma warning restore SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.