using System.Diagnostics; using System.Text.RegularExpressions; using System.Text.Json; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using QuestPDF.Fluent; using QuestPDF.Helpers; using QuestPDF.Infrastructure; using PuppeteerSharp; using VideoStudy.Shared; namespace VideoStudy.API.Services; public class AnalysisService { private readonly Kernel _kernel; private readonly ILogger _logger; private readonly List _debugSteps = new(); public AnalysisService(Kernel kernel, ILogger logger) { _kernel = kernel; _logger = logger; QuestPDF.Settings.License = LicenseType.Community; Task.Run(async () => { try { var browserFetcher = new BrowserFetcher(); await browserFetcher.DownloadAsync(); _logger.LogInformation("Chromium ready."); } catch (Exception ex) { _logger.LogError(ex, "Failed to download Chromium."); } }); } private void AddLog(string message) { _logger.LogInformation(message); _debugSteps.Add($"[{DateTime.Now:HH:mm:ss}] {message}"); } private string GetYtDlpPath() { if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.Windows)) { var exeName = "yt-dlp.exe"; var currentPath = Path.Combine(Directory.GetCurrentDirectory(), exeName); if (File.Exists(currentPath)) return currentPath; var basePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, exeName); if (File.Exists(basePath)) return basePath; var binPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Binaries", exeName); if (File.Exists(binPath)) return binPath; return "yt-dlp"; } else { var baseDir = AppDomain.CurrentDomain.BaseDirectory; var binariesDir = Path.Combine(baseDir, "Binaries"); string executableName = "yt-dlp_linux"; if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.OSX)) executableName = "yt-dlp_macos"; var fullPath = Path.Combine(binariesDir, executableName); if (!File.Exists(fullPath)) return "yt-dlp"; try { Process.Start("chmod", $"+x \"{fullPath}\"").WaitForExit(); } catch { } return fullPath; } } public async Task AnalyzeVideoAsync(AnalysisRequest request) { _debugSteps.Clear(); var tempDir = Path.Combine(Path.GetTempPath(), "VideoStudy", Guid.NewGuid().ToString()); Directory.CreateDirectory(tempDir); AddLog($"📁 Inciando processamento em: {tempDir}"); string rawLlmResponse = ""; try { // --- Step 1: Transcription --- AddLog("🌐 Obtendo transcrição via yt-dlp..."); var transcript = await GetTranscriptViaYtDlpAsync(request.VideoUrl, request.Language, tempDir); if (string.IsNullOrWhiteSpace(transcript)) throw new Exception("Não foi possível obter a transcrição do vídeo."); AddLog($"✅ Transcrição obtida ({transcript.Length} caracteres)."); // --- Step 2: Intelligence --- AddLog("🧠 Enviando transcrição para o Groq (LLM)..."); var (tutorialSections, rawJson) = await GenerateTutorialContentAsync(transcript, request.Language); rawLlmResponse = rawJson; // Save debug MD in project root var debugFile = Path.Combine(Directory.GetCurrentDirectory(), "DEBUG_LAST_RESPONSE.md"); var debugContent = $"# Debug Groq Response\n\nURL: {request.VideoUrl}\n\n## Raw JSON\n```json\n{rawJson}\n```\n\n## Sections\n" + string.Join("\n\n", tutorialSections.Select(s => $"### {s.Title}\n{s.Content}\n**Timestamp:** {s.ImageTimestamp}")); await File.WriteAllTextAsync(debugFile, debugContent); AddLog($"📝 Arquivo de debug gerado: {debugFile}"); // --- Step 3: Image Capture --- var sectionsWithImages = tutorialSections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList(); if (sectionsWithImages.Any()) { AddLog($"📸 Capturando {sectionsWithImages.Count} prints usando Puppeteer (Direct Bypass)..."); await CaptureScreenshotsWithPuppeteerAsync(request.VideoUrl, tutorialSections, tempDir); } else { AddLog("⚠️ Nenhuma tag [SCREENSHOT] foi gerada pela IA."); } // --- Step 4: PDF Generation --- AddLog("📄 Gerando PDF final com QuestPDF..."); var pdfBytes = GeneratePdf(request.VideoUrl, tutorialSections); AddLog("🎉 Processamento concluído com sucesso!"); return new AnalysisResponse { Status = "success", VideoTitle = request.VideoUrl, Transcript = transcript, TutorialSections = tutorialSections, PdfData = pdfBytes, DebugSteps = new List(_debugSteps), RawLlmResponse = rawLlmResponse, Analysis = "Tutorial gerado com sucesso!" }; } catch (Exception ex) { AddLog($"❌ ERRO: {ex.Message}"); return new AnalysisResponse { Status = "error", ErrorMessage = ex.Message, DebugSteps = new List(_debugSteps), RawLlmResponse = rawLlmResponse }; } finally { if (Directory.Exists(tempDir)) { try { Directory.Delete(tempDir, true); } catch { } } } } private async Task GetTranscriptViaYtDlpAsync(string url, string language, string outputDir) { var ytDlpPath = GetYtDlpPath(); var arguments = $"--skip-download --write-sub --write-auto-sub --sub-lang {language},en --sub-format vtt --output \"%(title)s\" \"{url}\""; var startInfo = new ProcessStartInfo { FileName = ytDlpPath, Arguments = arguments, WorkingDirectory = outputDir, RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true }; using var proc = Process.Start(startInfo); await proc.WaitForExitAsync(); var vttFile = Directory.GetFiles(outputDir, "*.vtt").FirstOrDefault(); if (vttFile == null) return string.Empty; return ParseVttToText(await File.ReadAllTextAsync(vttFile)); } private string ParseVttToText(string vttContent) { var lines = vttContent.Split('\n'); var textLines = new List(); var seen = new HashSet(); foreach (var line in lines) { var l = line.Trim(); if (string.IsNullOrWhiteSpace(l) || l.StartsWith("WEBVTT") || l.StartsWith("NOTE") || l.Contains("-->")) continue; l = Regex.Replace(l, @"<[^>]*>", ""); if (!seen.Contains(l)) { textLines.Add(l); seen.Add(l); } } return string.Join(" ", textLines); } private async Task<(List sections, string rawJson)> GenerateTutorialContentAsync(string transcript, string language) { var chatService = _kernel.GetRequiredService(); var prompt = $@" Converta a transcrição abaixo em um Tutorial Passo a Passo em {language}. REGRAS: 1. Divida em passos lógicos. 2. Identifique onde um print da tela é necessário e insira [SCREENSHOT: HH:MM:SS]. 3. Retorne APENAS JSON. JSON: {{ ""sections"": [ {{ ""title"": ""Passo 1"", ""content"": ""Texto... [SCREENSHOT: 00:01:20]"" }} ] }} Transcrição: {transcript[..Math.Min(transcript.Length, 15000)]}"; var result = await chatService.GetChatMessageContentAsync(prompt); var json = result.Content?.Trim() ?? "{}"; if (json.StartsWith("```")) { var idx = json.IndexOf('\n'); if (idx > 0) json = json[(idx+1)..]; if (json.EndsWith("```")) json = json[..^3]; } var sections = new List(); try { using var doc = JsonDocument.Parse(json); foreach (var el in doc.RootElement.GetProperty("sections").EnumerateArray()) { var content = el.GetProperty("content").GetString() ?? ""; var ts = ExtractTimestamp(content); sections.Add(new TutorialSection { Title = el.GetProperty("title").GetString() ?? "", Content = content.Replace($"[SCREENSHOT: {ts}]", "").Trim(), ImageTimestamp = ts }); } } catch { } return (sections, json); } private string? ExtractTimestamp(string text) { var match = Regex.Match(text, @"\[SCREENSHOT:\s*(\d{2}:\d{2}:\d{2})\]"); return match.Success ? match.Groups[1].Value : null; } private async Task GetRawVideoStreamUrl(string videoUrl) { var ytDlpPath = GetYtDlpPath(); var startInfo = new ProcessStartInfo { FileName = ytDlpPath, Arguments = $"-g -f b \"{videoUrl}\"", RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true }; using var proc = Process.Start(startInfo); if (proc == null) return null; var url = await proc.StandardOutput.ReadLineAsync(); await proc.WaitForExitAsync(); return url?.Trim(); } private async Task CaptureScreenshotsWithPuppeteerAsync(string videoUrl, List sections, string outputDir) { var sectionsWithImages = sections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList(); if (!sectionsWithImages.Any()) return; AddLog("🔍 Obtendo link direto do vídeo (Bypass YouTube Player)..."); var rawVideoUrl = await GetRawVideoStreamUrl(videoUrl); if (string.IsNullOrEmpty(rawVideoUrl)) { AddLog("❌ Falha ao obter link direto."); return; } try { using var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true, Args = new[] { "--no-sandbox", "--window-size=1280,720" } }); using var page = await browser.NewPageAsync(); await page.SetViewportAsync(new ViewPortOptions { Width = 1280, Height = 720 }); var html = $@""; await page.SetContentAsync(html); await page.WaitForSelectorAsync("video"); foreach (var section in sectionsWithImages) { if (TimeSpan.TryParse(section.ImageTimestamp, out var ts)) { var sec = (int)ts.TotalSeconds; AddLog($"🌐 Renderizando frame: {section.ImageTimestamp}..."); await page.EvaluateFunctionAsync(@"(s) => { return new Promise(r => { const v = document.getElementById('v'); v.currentTime = s; v.addEventListener('seeked', r, {once:true}); }); }", sec); await Task.Delay(500); var path = Path.Combine(outputDir, $"snap_{sec}.jpg"); await page.ScreenshotAsync(path, new ScreenshotOptions { Type = ScreenshotType.Jpeg, Quality = 90 }); if (File.Exists(path)) section.ImageData = await File.ReadAllBytesAsync(path); } } } catch (Exception ex) { AddLog($"❌ Erro Puppeteer: {ex.Message}"); } } private byte[] GeneratePdf(string videoUrl, List sections) { var document = Document.Create(container => { container.Page(page => { page.Margin(2, Unit.Centimetre); page.DefaultTextStyle(x => x.FontSize(11).FontFamily("Arial")); page.Header().Text("Video Tutorial").SemiBold().FontSize(24).FontColor(Colors.Blue.Medium); page.Content().PaddingVertical(1, Unit.Centimetre).Column(column => { column.Item().Text($"Fonte: {videoUrl}").Italic().FontSize(10).FontColor(Colors.Grey.Medium); column.Item().PaddingBottom(20); foreach (var section in sections) { column.Item().Text(section.Title).Bold().FontSize(16); column.Item().Text(text => { text.Span(section.Content); }); if (section.ImageData != null) column.Item().PaddingVertical(15).Image(section.ImageData).FitWidth(); column.Item().PaddingBottom(20); } }); page.Footer().AlignCenter().Text(x => { x.Span("Gerado por VideoStudy.app - "); x.CurrentPageNumber(); }); }); }); return document.GeneratePdf(); } }