288 lines
9.8 KiB
C#
288 lines
9.8 KiB
C#
using ChatRAG.Data;
|
|
using ChatRAG.Models;
|
|
using ChatRAG.Services.Contracts;
|
|
using Microsoft.SemanticKernel;
|
|
using Microsoft.SemanticKernel.Embeddings;
|
|
using System.Text;
|
|
|
|
#pragma warning disable SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
|
|
|
|
namespace ChatApi.Data
|
|
{
|
|
public class TextData : ITextDataService
|
|
{
|
|
private readonly ITextEmbeddingGenerationService _textEmbeddingGenerationService;
|
|
private readonly TextDataRepository _textDataService;
|
|
|
|
public TextData(ITextEmbeddingGenerationService textEmbeddingGenerationService, TextDataRepository textDataService)
|
|
{
|
|
_textEmbeddingGenerationService = textEmbeddingGenerationService;
|
|
_textDataService = textDataService;
|
|
}
|
|
|
|
public string ProviderName => "MongoDB";
|
|
|
|
// ========================================
|
|
// MÉTODOS ORIGINAIS (já implementados)
|
|
// ========================================
|
|
|
|
public async Task SalvarTextoComEmbeddingNoMongoDB(string textoCompleto, string projectId)
|
|
{
|
|
var textoArray = new List<string>();
|
|
string[] textolinhas = textoCompleto.Split(
|
|
new string[] { "\n" },
|
|
StringSplitOptions.None
|
|
);
|
|
|
|
var title = textolinhas[0];
|
|
|
|
var builder = new StringBuilder();
|
|
foreach (string line in textolinhas)
|
|
{
|
|
if (line.StartsWith("**") || line.StartsWith("\r**"))
|
|
{
|
|
if (builder.Length > 0)
|
|
{
|
|
textoArray.Add(title.Replace("**", "").Replace("\r", "") + ": " + Environment.NewLine + builder.ToString());
|
|
builder = new StringBuilder();
|
|
title = line;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
builder.AppendLine(line);
|
|
}
|
|
}
|
|
|
|
foreach (var item in textoArray)
|
|
{
|
|
await SalvarNoMongoDB(title, item, projectId);
|
|
}
|
|
}
|
|
|
|
public async Task SalvarNoMongoDB(string titulo, string texto, string projectId)
|
|
{
|
|
await SalvarNoMongoDB(null, titulo, texto, projectId);
|
|
}
|
|
|
|
public async Task SalvarNoMongoDB(string? id, string titulo, string texto, string projectId)
|
|
{
|
|
var conteudo = $"**{titulo}** \n\n {texto}";
|
|
// Gerar embedding para o texto
|
|
var embedding = await _textEmbeddingGenerationService.GenerateEmbeddingAsync(conteudo);
|
|
|
|
// Converter embedding para um formato serializável (como um array de floats)
|
|
var embeddingArray = embedding.ToArray().Select(e => (double)e).ToArray();
|
|
|
|
var exists = id != null ? await this.GetById(id) : null;
|
|
|
|
if (exists == null)
|
|
{
|
|
var documento = new TextoComEmbedding
|
|
{
|
|
Id = id ?? Guid.NewGuid().ToString(),
|
|
Titulo = titulo,
|
|
Conteudo = texto,
|
|
ProjetoId = projectId,
|
|
Embedding = embeddingArray
|
|
};
|
|
|
|
await _textDataService.CreateAsync(documento);
|
|
}
|
|
else
|
|
{
|
|
var documento = new TextoComEmbedding
|
|
{
|
|
Id = id!,
|
|
Titulo = titulo,
|
|
Conteudo = texto,
|
|
ProjetoId = projectId,
|
|
Embedding = embeddingArray
|
|
};
|
|
|
|
await _textDataService.UpdateAsync(id!, documento);
|
|
}
|
|
}
|
|
|
|
public async Task<IEnumerable<TextoComEmbedding>> GetAll()
|
|
{
|
|
return await _textDataService.GetAsync();
|
|
}
|
|
|
|
public async Task<IEnumerable<TextoComEmbedding>> GetByPorjectId(string porjectId)
|
|
{
|
|
return await _textDataService.GetByProjectIdAsync(porjectId);
|
|
}
|
|
|
|
public async Task<TextoComEmbedding> GetById(string id)
|
|
{
|
|
return (await _textDataService.GetAsync(id))!;
|
|
}
|
|
|
|
// ========================================
|
|
// MÉTODOS NOVOS DA INTERFACE (implementação completa)
|
|
// ========================================
|
|
|
|
public async Task<string> SaveDocumentAsync(DocumentInput document)
|
|
{
|
|
var id = document.Id ?? Guid.NewGuid().ToString();
|
|
await SalvarNoMongoDB(id, document.Title, document.Content, document.ProjectId);
|
|
return id;
|
|
}
|
|
|
|
public async Task UpdateDocumentAsync(string id, DocumentInput document)
|
|
{
|
|
await SalvarNoMongoDB(id, document.Title, document.Content, document.ProjectId);
|
|
}
|
|
|
|
public async Task DeleteDocumentAsync(string id)
|
|
{
|
|
await _textDataService.RemoveAsync(id);
|
|
}
|
|
|
|
public async Task<bool> DocumentExistsAsync(string id)
|
|
{
|
|
try
|
|
{
|
|
var doc = await GetById(id);
|
|
return doc != null;
|
|
}
|
|
catch
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public async Task<DocumentOutput?> GetDocumentAsync(string id)
|
|
{
|
|
try
|
|
{
|
|
var doc = await GetById(id);
|
|
if (doc == null) return null;
|
|
|
|
return new DocumentOutput
|
|
{
|
|
Id = doc.Id,
|
|
Title = doc.Titulo,
|
|
Content = doc.Conteudo,
|
|
ProjectId = doc.ProjetoId,
|
|
Embedding = doc.Embedding,
|
|
CreatedAt = DateTime.UtcNow,
|
|
UpdatedAt = DateTime.UtcNow,
|
|
Metadata = new Dictionary<string, object>
|
|
{
|
|
["source"] = "MongoDB",
|
|
["has_embedding"] = doc.Embedding != null,
|
|
["embedding_size"] = doc.Embedding?.Length ?? 0
|
|
}
|
|
};
|
|
}
|
|
catch
|
|
{
|
|
return null;
|
|
}
|
|
}
|
|
|
|
public async Task<List<DocumentOutput>> GetDocumentsByProjectAsync(string projectId)
|
|
{
|
|
var docs = await GetByPorjectId(projectId);
|
|
return docs.Select(doc => new DocumentOutput
|
|
{
|
|
Id = doc.Id,
|
|
Title = doc.Titulo,
|
|
Content = doc.Conteudo,
|
|
ProjectId = doc.ProjetoId,
|
|
Embedding = doc.Embedding,
|
|
CreatedAt = DateTime.UtcNow,
|
|
UpdatedAt = DateTime.UtcNow,
|
|
Metadata = new Dictionary<string, object>
|
|
{
|
|
["source"] = "MongoDB",
|
|
["has_embedding"] = doc.Embedding != null,
|
|
["embedding_size"] = doc.Embedding?.Length ?? 0
|
|
}
|
|
}).ToList();
|
|
}
|
|
|
|
public async Task<int> GetDocumentCountAsync(string? projectId = null)
|
|
{
|
|
if (string.IsNullOrEmpty(projectId))
|
|
{
|
|
var all = await GetAll();
|
|
return all.Count();
|
|
}
|
|
else
|
|
{
|
|
var byProject = await GetByPorjectId(projectId);
|
|
return byProject.Count();
|
|
}
|
|
}
|
|
|
|
public async Task<List<string>> SaveDocumentsBatchAsync(List<DocumentInput> documents)
|
|
{
|
|
var ids = new List<string>();
|
|
|
|
foreach (var doc in documents)
|
|
{
|
|
var id = await SaveDocumentAsync(doc);
|
|
ids.Add(id);
|
|
}
|
|
|
|
return ids;
|
|
}
|
|
|
|
public async Task DeleteDocumentsBatchAsync(List<string> ids)
|
|
{
|
|
foreach (var id in ids)
|
|
{
|
|
await DeleteDocumentAsync(id);
|
|
}
|
|
}
|
|
|
|
public async Task<Dictionary<string, object>> GetProviderStatsAsync()
|
|
{
|
|
try
|
|
{
|
|
var totalDocs = await GetDocumentCountAsync();
|
|
var allDocs = await GetAll();
|
|
|
|
var docsWithEmbedding = allDocs.Count(d => d.Embedding != null && d.Embedding.Length > 0);
|
|
var avgContentLength = allDocs.Any() ? allDocs.Average(d => d.Conteudo?.Length ?? 0) : 0;
|
|
|
|
var projectStats = allDocs
|
|
.GroupBy(d => d.ProjetoId)
|
|
.ToDictionary(
|
|
g => g.Key ?? "unknown",
|
|
g => g.Count()
|
|
);
|
|
|
|
return new Dictionary<string, object>
|
|
{
|
|
["provider"] = "MongoDB",
|
|
["total_documents"] = totalDocs,
|
|
["documents_with_embedding"] = docsWithEmbedding,
|
|
["embedding_coverage"] = totalDocs > 0 ? (double)docsWithEmbedding / totalDocs : 0,
|
|
["average_content_length"] = Math.Round(avgContentLength, 1),
|
|
["projects_count"] = projectStats.Count,
|
|
["documents_by_project"] = projectStats,
|
|
["health"] = "ok",
|
|
["last_check"] = DateTime.UtcNow,
|
|
["connection_status"] = "connected"
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new Dictionary<string, object>
|
|
{
|
|
["provider"] = "MongoDB",
|
|
["health"] = "error",
|
|
["error"] = ex.Message,
|
|
["last_check"] = DateTime.UtcNow,
|
|
["connection_status"] = "error"
|
|
};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#pragma warning restore SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. |