107 lines
3.6 KiB
C#
107 lines
3.6 KiB
C#
using ChatRAG.Models;
|
|
using ChatRAG.Repositories;
|
|
using Microsoft.SemanticKernel;
|
|
using Microsoft.SemanticKernel.Embeddings;
|
|
using System.Text;
|
|
|
|
#pragma warning disable SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
|
|
|
|
namespace ChatApi.Data
|
|
{
|
|
public class TextData
|
|
{
|
|
private readonly ITextEmbeddingGenerationService _textEmbeddingGenerationService;
|
|
private readonly TextDataService _textDataService;
|
|
|
|
public TextData(ITextEmbeddingGenerationService textEmbeddingGenerationService, TextDataService textDataService)
|
|
{
|
|
_textEmbeddingGenerationService = textEmbeddingGenerationService;
|
|
_textDataService = textDataService;
|
|
}
|
|
|
|
public async Task SalvarTextoComEmbeddingNoMongoDB(string textoCompleto)
|
|
{
|
|
var textoArray = new List<string>();
|
|
string[] textolinhas = textoCompleto.Split(
|
|
new string[] { "\n" },
|
|
StringSplitOptions.None
|
|
);
|
|
|
|
var title = textolinhas[0];
|
|
|
|
var builder = new StringBuilder();
|
|
foreach (string line in textolinhas)
|
|
{
|
|
if (line.StartsWith("**") || line.StartsWith("\r**"))
|
|
{
|
|
if (builder.Length > 0)
|
|
{
|
|
textoArray.Add(title.Replace("**", "").Replace("\r", "") + ": " + Environment.NewLine + builder.ToString());
|
|
builder = new StringBuilder();
|
|
title = line;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
builder.AppendLine(line);
|
|
}
|
|
}
|
|
|
|
foreach(var item in textoArray)
|
|
{
|
|
await SalvarNoMongoDB(title, item);
|
|
}
|
|
}
|
|
|
|
public async Task SalvarNoMongoDB(string titulo, string texto)
|
|
{
|
|
await SalvarNoMongoDB(null, titulo, texto);
|
|
}
|
|
|
|
public async Task SalvarNoMongoDB(string? id, string titulo, string texto)
|
|
{
|
|
// Gerar embedding para o texto
|
|
var embedding = await _textEmbeddingGenerationService.GenerateEmbeddingAsync(texto);
|
|
|
|
// Converter embedding para um formato serializável (como um array de floats)
|
|
var embeddingArray = embedding.ToArray().Select(e => (double)e).ToArray();
|
|
|
|
var exists = id!=null ? await this.GetById(id) : null;
|
|
|
|
if (exists == null)
|
|
{
|
|
var documento = new TextoComEmbedding
|
|
{
|
|
Titulo = titulo,
|
|
Conteudo = texto,
|
|
Embedding = embeddingArray
|
|
};
|
|
|
|
await _textDataService.CreateAsync(documento);
|
|
}
|
|
else
|
|
{
|
|
var documento = new TextoComEmbedding
|
|
{
|
|
Id = id,
|
|
Titulo = titulo,
|
|
Conteudo = texto,
|
|
Embedding = embeddingArray
|
|
};
|
|
|
|
await _textDataService.UpdateAsync(id, documento);
|
|
}
|
|
}
|
|
|
|
public async Task<IEnumerable<TextoComEmbedding>> GetAll()
|
|
{
|
|
return await _textDataService.GetAsync();
|
|
}
|
|
public async Task<TextoComEmbedding> GetById(string id)
|
|
{
|
|
return await _textDataService.GetAsync(id);
|
|
}
|
|
}
|
|
}
|
|
#pragma warning restore SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
|