VideoStudy/VideoStudy.API/Services/AnalysisService.cs
2026-02-09 18:31:08 -03:00

335 lines
14 KiB
C#

using System.Diagnostics;
using System.Text.RegularExpressions;
using System.Text.Json;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using QuestPDF.Fluent;
using QuestPDF.Helpers;
using QuestPDF.Infrastructure;
using PuppeteerSharp;
using VideoStudy.Shared;
namespace VideoStudy.API.Services;
public class AnalysisService
{
private readonly Kernel _kernel;
private readonly ILogger<AnalysisService> _logger;
private readonly List<string> _debugSteps = new();
public AnalysisService(Kernel kernel, ILogger<AnalysisService> logger)
{
_kernel = kernel;
_logger = logger;
QuestPDF.Settings.License = LicenseType.Community;
Task.Run(async () => {
try
{
var browserFetcher = new BrowserFetcher();
await browserFetcher.DownloadAsync();
_logger.LogInformation("Chromium ready.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download Chromium.");
}
});
}
private void AddLog(string message)
{
_logger.LogInformation(message);
_debugSteps.Add($"[{DateTime.Now:HH:mm:ss}] {message}");
}
private string GetYtDlpPath()
{
if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.Windows))
{
var exeName = "yt-dlp.exe";
var currentPath = Path.Combine(Directory.GetCurrentDirectory(), exeName);
if (File.Exists(currentPath)) return currentPath;
var basePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, exeName);
if (File.Exists(basePath)) return basePath;
var binPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Binaries", exeName);
if (File.Exists(binPath)) return binPath;
return "yt-dlp";
}
else
{
var baseDir = AppDomain.CurrentDomain.BaseDirectory;
var binariesDir = Path.Combine(baseDir, "Binaries");
string executableName = "yt-dlp_linux";
if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.OSX))
executableName = "yt-dlp_macos";
var fullPath = Path.Combine(binariesDir, executableName);
if (!File.Exists(fullPath)) return "yt-dlp";
try { Process.Start("chmod", $"+x \"{fullPath}\"").WaitForExit(); } catch { }
return fullPath;
}
}
public async Task<AnalysisResponse> AnalyzeVideoAsync(AnalysisRequest request)
{
_debugSteps.Clear();
var tempDir = Path.Combine(Path.GetTempPath(), "VideoStudy", Guid.NewGuid().ToString());
Directory.CreateDirectory(tempDir);
AddLog($"📁 Inciando processamento em: {tempDir}");
string rawLlmResponse = "";
try
{
// --- Step 1: Transcription ---
AddLog("🌐 Obtendo transcrição via yt-dlp...");
var transcript = await GetTranscriptViaYtDlpAsync(request.VideoUrl, request.Language, tempDir);
if (string.IsNullOrWhiteSpace(transcript))
throw new Exception("Não foi possível obter a transcrição do vídeo.");
AddLog($"✅ Transcrição obtida ({transcript.Length} caracteres).");
// --- Step 2: Intelligence ---
AddLog("🧠 Enviando transcrição para o Groq (LLM)...");
var (tutorialSections, rawJson) = await GenerateTutorialContentAsync(transcript, request.Language);
rawLlmResponse = rawJson;
// Save debug MD in project root
var debugFile = Path.Combine(Directory.GetCurrentDirectory(), "DEBUG_LAST_RESPONSE.md");
var debugContent = $"# Debug Groq Response\n\nURL: {request.VideoUrl}\n\n## Raw JSON\n```json\n{rawJson}\n```\n\n## Sections\n" +
string.Join("\n\n", tutorialSections.Select(s => $"### {s.Title}\n{s.Content}\n**Timestamp:** {s.ImageTimestamp}"));
await File.WriteAllTextAsync(debugFile, debugContent);
AddLog($"📝 Arquivo de debug gerado: {debugFile}");
// --- Step 3: Image Capture ---
var sectionsWithImages = tutorialSections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList();
if (sectionsWithImages.Any())
{
AddLog($"📸 Capturando {sectionsWithImages.Count} prints usando Puppeteer (Direct Bypass)...");
await CaptureScreenshotsWithPuppeteerAsync(request.VideoUrl, tutorialSections, tempDir);
}
else
{
AddLog("⚠️ Nenhuma tag [SCREENSHOT] foi gerada pela IA.");
}
// --- Step 4: PDF Generation ---
AddLog("📄 Gerando PDF final com QuestPDF...");
var pdfBytes = GeneratePdf(request.VideoUrl, tutorialSections);
AddLog("🎉 Processamento concluído com sucesso!");
return new AnalysisResponse
{
Status = "success",
VideoTitle = request.VideoUrl,
Transcript = transcript,
TutorialSections = tutorialSections,
PdfData = pdfBytes,
DebugSteps = new List<string>(_debugSteps),
RawLlmResponse = rawLlmResponse,
Analysis = "Tutorial gerado com sucesso!"
};
}
catch (Exception ex)
{
AddLog($"❌ ERRO: {ex.Message}");
return new AnalysisResponse
{
Status = "error",
ErrorMessage = ex.Message,
DebugSteps = new List<string>(_debugSteps),
RawLlmResponse = rawLlmResponse
};
}
finally
{
if (Directory.Exists(tempDir))
{
try { Directory.Delete(tempDir, true); } catch { }
}
}
}
private async Task<string> GetTranscriptViaYtDlpAsync(string url, string language, string outputDir)
{
var ytDlpPath = GetYtDlpPath();
var arguments = $"--skip-download --write-sub --write-auto-sub --sub-lang {language},en --sub-format vtt --output \"%(title)s\" \"{url}\"";
var startInfo = new ProcessStartInfo
{
FileName = ytDlpPath,
Arguments = arguments,
WorkingDirectory = outputDir,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var proc = Process.Start(startInfo);
await proc.WaitForExitAsync();
var vttFile = Directory.GetFiles(outputDir, "*.vtt").FirstOrDefault();
if (vttFile == null) return string.Empty;
return ParseVttToText(await File.ReadAllTextAsync(vttFile));
}
private string ParseVttToText(string vttContent)
{
var lines = vttContent.Split('\n');
var textLines = new List<string>();
var seen = new HashSet<string>();
foreach (var line in lines)
{
var l = line.Trim();
if (string.IsNullOrWhiteSpace(l) || l.StartsWith("WEBVTT") || l.StartsWith("NOTE") || l.Contains("-->")) continue;
l = Regex.Replace(l, @"<[^>]*>", "");
if (!seen.Contains(l)) { textLines.Add(l); seen.Add(l); }
}
return string.Join(" ", textLines);
}
private async Task<(List<TutorialSection> sections, string rawJson)> GenerateTutorialContentAsync(string transcript, string language)
{
var chatService = _kernel.GetRequiredService<IChatCompletionService>();
var prompt = $@"
Converta a transcrição abaixo em um Tutorial Passo a Passo em {language}.
REGRAS:
1. Divida em passos lógicos.
2. Identifique onde um print da tela é necessário e insira [SCREENSHOT: HH:MM:SS].
3. Retorne APENAS JSON.
JSON:
{{
""sections"": [
{{ ""title"": ""Passo 1"", ""content"": ""Texto... [SCREENSHOT: 00:01:20]"" }}
]
}}
Transcrição: {transcript[..Math.Min(transcript.Length, 15000)]}";
var result = await chatService.GetChatMessageContentAsync(prompt);
var json = result.Content?.Trim() ?? "{}";
if (json.StartsWith("```")) {
var idx = json.IndexOf('\n');
if (idx > 0) json = json[(idx+1)..];
if (json.EndsWith("```")) json = json[..^3];
}
var sections = new List<TutorialSection>();
try {
using var doc = JsonDocument.Parse(json);
foreach (var el in doc.RootElement.GetProperty("sections").EnumerateArray()) {
var content = el.GetProperty("content").GetString() ?? "";
var ts = ExtractTimestamp(content);
sections.Add(new TutorialSection {
Title = el.GetProperty("title").GetString() ?? "",
Content = content.Replace($"[SCREENSHOT: {ts}]", "").Trim(),
ImageTimestamp = ts
});
}
} catch { }
return (sections, json);
}
private string? ExtractTimestamp(string text)
{
var match = Regex.Match(text, @"\[SCREENSHOT:\s*(\d{2}:\d{2}:\d{2})\]");
return match.Success ? match.Groups[1].Value : null;
}
private async Task<string?> GetRawVideoStreamUrl(string videoUrl)
{
var ytDlpPath = GetYtDlpPath();
var startInfo = new ProcessStartInfo
{
FileName = ytDlpPath,
Arguments = $"-g -f b \"{videoUrl}\"",
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var proc = Process.Start(startInfo);
if (proc == null) return null;
var url = await proc.StandardOutput.ReadLineAsync();
await proc.WaitForExitAsync();
return url?.Trim();
}
private async Task CaptureScreenshotsWithPuppeteerAsync(string videoUrl, List<TutorialSection> sections, string outputDir)
{
var sectionsWithImages = sections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList();
if (!sectionsWithImages.Any()) return;
AddLog("🔍 Obtendo link direto do vídeo (Bypass YouTube Player)...");
var rawVideoUrl = await GetRawVideoStreamUrl(videoUrl);
if (string.IsNullOrEmpty(rawVideoUrl)) { AddLog("❌ Falha ao obter link direto."); return; }
try
{
using var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true, Args = new[] { "--no-sandbox", "--window-size=1280,720" } });
using var page = await browser.NewPageAsync();
await page.SetViewportAsync(new ViewPortOptions { Width = 1280, Height = 720 });
var html = $@"<html><body style='margin:0;background:black;overflow:hidden;'><video id='v' width='1280' height='720' muted><source src='{rawVideoUrl}' type='video/mp4'></video></body></html>";
await page.SetContentAsync(html);
await page.WaitForSelectorAsync("video");
foreach (var section in sectionsWithImages)
{
if (TimeSpan.TryParse(section.ImageTimestamp, out var ts))
{
var sec = (int)ts.TotalSeconds;
AddLog($"🌐 Renderizando frame: {section.ImageTimestamp}...");
await page.EvaluateFunctionAsync(@"(s) => {
return new Promise(r => {
const v = document.getElementById('v');
v.currentTime = s;
v.addEventListener('seeked', r, {once:true});
});
}", sec);
await Task.Delay(500);
var path = Path.Combine(outputDir, $"snap_{sec}.jpg");
await page.ScreenshotAsync(path, new ScreenshotOptions { Type = ScreenshotType.Jpeg, Quality = 90 });
if (File.Exists(path)) section.ImageData = await File.ReadAllBytesAsync(path);
}
}
}
catch (Exception ex) { AddLog($"❌ Erro Puppeteer: {ex.Message}"); }
}
private byte[] GeneratePdf(string videoUrl, List<TutorialSection> sections)
{
var document = Document.Create(container =>
{
container.Page(page =>
{
page.Margin(2, Unit.Centimetre);
page.DefaultTextStyle(x => x.FontSize(11).FontFamily("Arial"));
page.Header().Text("Video Tutorial").SemiBold().FontSize(24).FontColor(Colors.Blue.Medium);
page.Content().PaddingVertical(1, Unit.Centimetre).Column(column =>
{
column.Item().Text($"Fonte: {videoUrl}").Italic().FontSize(10).FontColor(Colors.Grey.Medium);
column.Item().PaddingBottom(20);
foreach (var section in sections)
{
column.Item().Text(section.Title).Bold().FontSize(16);
column.Item().Text(text => { text.Span(section.Content); });
if (section.ImageData != null)
column.Item().PaddingVertical(15).Image(section.ImageData).FitWidth();
column.Item().PaddingBottom(20);
}
});
page.Footer().AlignCenter().Text(x => { x.Span("Gerado por VideoStudy.app - "); x.CurrentPageNumber(); });
});
});
return document.GeneratePdf();
}
}