ChatRAG/Tools/MigrationService.cs
2025-06-15 21:34:47 -03:00

265 lines
10 KiB
C#
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using ChatRAG.Contracts.VectorSearch;
using ChatRAG.Data;
using ChatRAG.Models;
using ChatRAG.Services.Contracts;
using ChatRAG.Services.Migration;
using ChatRAG.Services.SearchVectors;
using ChatRAG.Settings.ChatRAG.Configuration;
using ChatRAG.Settings;
using Microsoft.Extensions.Options;
using System.Diagnostics;
namespace ChatRAG.Services.Migration
{
public class MigrationService
{
private readonly ILogger<MigrationService> _logger;
private readonly IServiceProvider _serviceProvider;
public MigrationService(
ILogger<MigrationService> logger,
IServiceProvider serviceProvider)
{
_logger = logger;
_serviceProvider = serviceProvider;
}
/// <summary>
/// Migra todos os dados do MongoDB para Qdrant
/// </summary>
public async Task<MigrationResult> MigrateFromMongoToQdrantAsync(
bool validateData = true,
int batchSize = 50)
{
var stopwatch = Stopwatch.StartNew();
var result = new MigrationResult { StartTime = DateTime.UtcNow };
try
{
_logger.LogInformation("🚀 Iniciando migração MongoDB → Qdrant...");
// Cria serviços específicos para migração
var mongoService = CreateMongoService();
var qdrantService = CreateQdrantService();
// 1. Exporta dados do MongoDB
_logger.LogInformation("📤 Exportando dados do MongoDB...");
var mongoDocuments = await mongoService.GetAll();
var documentsList = mongoDocuments.ToList();
result.TotalDocuments = documentsList.Count;
_logger.LogInformation("✅ {Count} documentos encontrados no MongoDB", result.TotalDocuments);
if (!documentsList.Any())
{
_logger.LogWarning("⚠️ Nenhum documento encontrado no MongoDB");
result.Success = true;
result.Message = "Migração concluída - nenhum documento para migrar";
return result;
}
// 2. Agrupa por projeto para migração organizada
var documentsByProject = documentsList.GroupBy(d => d.ProjetoId).ToList();
_logger.LogInformation("📁 Documentos organizados em {ProjectCount} projetos", documentsByProject.Count);
// 3. Migra por projeto em lotes
foreach (var projectGroup in documentsByProject)
{
var projectId = projectGroup.Key;
var projectDocs = projectGroup.ToList();
_logger.LogInformation("📂 Migrando projeto {ProjectId}: {DocCount} documentos",
projectId, projectDocs.Count);
// Processa em lotes para não sobrecarregar
for (int i = 0; i < projectDocs.Count; i += batchSize)
{
var batch = projectDocs.Skip(i).Take(batchSize).ToList();
try
{
await MigrateBatch(batch, qdrantService);
result.MigratedDocuments += batch.Count;
_logger.LogDebug("✅ Lote {BatchNum}: {BatchCount} documentos migrados",
(i / batchSize) + 1, batch.Count);
}
catch (Exception ex)
{
_logger.LogError(ex, "❌ Erro no lote {BatchNum} do projeto {ProjectId}",
(i / batchSize) + 1, projectId);
result.Errors.Add($"Projeto {projectId}, lote {(i / batchSize) + 1}: {ex.Message}");
}
}
}
// 4. Validação (se solicitada)
if (validateData)
{
_logger.LogInformation("🔍 Validando dados migrados...");
var validationResult = await ValidateMigration(mongoService, qdrantService);
result.ValidationResult = validationResult;
if (!validationResult.IsValid)
{
_logger.LogWarning("⚠️ Validação encontrou inconsistências: {Issues}",
string.Join(", ", validationResult.Issues));
}
else
{
_logger.LogInformation("✅ Validação passou - dados consistentes");
}
}
stopwatch.Stop();
result.Duration = stopwatch.Elapsed;
result.Success = true;
result.Message = $"Migração concluída: {result.MigratedDocuments}/{result.TotalDocuments} documentos";
_logger.LogInformation("🎉 Migração concluída em {Duration}s: {MigratedCount}/{TotalCount} documentos",
result.Duration.TotalSeconds, result.MigratedDocuments, result.TotalDocuments);
return result;
}
catch (Exception ex)
{
stopwatch.Stop();
result.Duration = stopwatch.Elapsed;
result.Success = false;
result.Message = $"Erro na migração: {ex.Message}";
result.Errors.Add(ex.ToString());
_logger.LogError(ex, "💥 Erro fatal na migração");
return result;
}
}
/// <summary>
/// Rollback - remove todos os dados do Qdrant
/// </summary>
public async Task<bool> RollbackQdrantAsync()
{
try
{
_logger.LogWarning("🔄 Iniciando rollback - removendo dados do Qdrant...");
var qdrantService = CreateQdrantService();
// Busca todos os documentos
var allDocuments = await qdrantService.GetAll();
var documentIds = allDocuments.Select(d => d.Id).ToList();
if (!documentIds.Any())
{
_logger.LogInformation(" Nenhum documento encontrado no Qdrant para rollback");
return true;
}
// Remove em lotes
var batchSize = 100;
for (int i = 0; i < documentIds.Count; i += batchSize)
{
var batch = documentIds.Skip(i).Take(batchSize).ToList();
await qdrantService.DeleteDocumentsBatchAsync(batch);
_logger.LogDebug("🗑️ Lote {BatchNum}: {BatchCount} documentos removidos",
(i / batchSize) + 1, batch.Count);
}
_logger.LogInformation("✅ Rollback concluído: {Count} documentos removidos do Qdrant", documentIds.Count);
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "❌ Erro no rollback");
return false;
}
}
// ========================================
// MÉTODOS AUXILIARES
// ========================================
private async Task MigrateBatch(List<ChatRAG.Models.TextoComEmbedding> batch, ITextDataService qdrantService)
{
var documents = batch.Select(doc => new DocumentInput
{
Id = doc.Id,
Title = doc.Titulo,
Content = doc.Conteudo,
ProjectId = doc.ProjetoId,
CreatedAt = DateTime.UtcNow,
UpdatedAt = DateTime.UtcNow,
Metadata = new Dictionary<string, object>
{
["migrated_from"] = "mongodb",
["migration_date"] = DateTime.UtcNow.ToString("O"),
["original_id"] = doc.Id,
["project_name"] = doc.ProjetoNome ?? "",
["document_type"] = doc.TipoDocumento ?? "",
["category"] = doc.Categoria ?? ""
}
}).ToList();
await qdrantService.SaveDocumentsBatchAsync(documents);
}
private async Task<ValidationResult> ValidateMigration(ITextDataService mongoService, ITextDataService qdrantService)
{
var result = new ValidationResult();
try
{
// Compara contagens
var mongoCount = await mongoService.GetDocumentCountAsync();
var qdrantCount = await qdrantService.GetDocumentCountAsync();
if (mongoCount != qdrantCount)
{
result.Issues.Add($"Contagem divergente: MongoDB({mongoCount}) vs Qdrant({qdrantCount})");
}
// Valida alguns documentos aleatoriamente
var mongoDocuments = await mongoService.GetAll();
var sampleDocs = mongoDocuments.Take(10).ToList();
foreach (var mongoDoc in sampleDocs)
{
var qdrantDoc = await qdrantService.GetDocumentAsync(mongoDoc.Id);
if (qdrantDoc == null)
{
result.Issues.Add($"Documento {mongoDoc.Id} não encontrado no Qdrant");
}
else if (qdrantDoc.Title != mongoDoc.Titulo || qdrantDoc.Content != mongoDoc.Conteudo)
{
result.Issues.Add($"Conteúdo divergente no documento {mongoDoc.Id}");
}
}
result.IsValid = !result.Issues.Any();
return result;
}
catch (Exception ex)
{
result.Issues.Add($"Erro na validação: {ex.Message}");
result.IsValid = false;
return result;
}
}
private ITextDataService CreateMongoService()
{
// Força usar MongoDB independente da configuração
return _serviceProvider.GetRequiredService<ChatApi.Data.TextData>();
}
private ITextDataService CreateQdrantService()
{
// Força usar Qdrant independente da configuração
return _serviceProvider.GetRequiredService<ChatRAG.Services.TextServices.QdrantTextDataService>();
}
}
}