using System.Diagnostics; using System.Text.RegularExpressions; using System.Text.Json; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using QuestPDF.Fluent; using QuestPDF.Helpers; using QuestPDF.Infrastructure; using PuppeteerSharp; using VideoStudy.Shared; using SkiaSharp; using System.Linq; using System.Security.Cryptography; // Added for MD5 hash namespace VideoStudy.API.Services; public class AnalysisService { private readonly Kernel _kernel; private readonly ILogger _logger; private readonly List _debugSteps = new(); public AnalysisService(Kernel kernel, ILogger logger) { _kernel = kernel; _logger = logger; QuestPDF.Settings.License = LicenseType.Community; Task.Run(async () => { try { var browserFetcher = new BrowserFetcher(); await browserFetcher.DownloadAsync(); _logger.LogInformation("Chromium ready."); } catch (Exception ex) { _logger.LogError(ex, "Failed to download Chromium."); } }); } private void AddLog(string message) { _logger.LogInformation(message); _debugSteps.Add($"[{DateTime.Now:HH:mm:ss}] {message}"); } private string GetYtDlpPath() { if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.Windows)) { var exeName = "yt-dlp.exe"; var currentPath = Path.Combine(Directory.GetCurrentDirectory(), exeName); if (File.Exists(currentPath)) return currentPath; var basePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, exeName); if (File.Exists(basePath)) return basePath; var binPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Binaries", exeName); if (File.Exists(binPath)) return binPath; return "yt-dlp"; } else { var baseDir = AppDomain.CurrentDomain.BaseDirectory; var binariesDir = Path.Combine(baseDir, "Binaries"); string executableName = "yt-dlp_linux"; if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.OSX)) executableName = "yt-dlp_macos"; var fullPath = Path.Combine(binariesDir, executableName); if (!File.Exists(fullPath)) return "yt-dlp"; try { Process.Start("chmod", $"+x \"{fullPath}\"").WaitForExit(); } catch { } return fullPath; } } public async Task GetVideoInfoAsync(string url) { var ytDlpPath = GetYtDlpPath(); var startInfo = new ProcessStartInfo { FileName = ytDlpPath, Arguments = $"--print title --print channel --print duration --print thumbnail \"{url}\"", RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true }; using var proc = Process.Start(startInfo) ?? throw new Exception("Failed to start yt-dlp"); var output = await proc.StandardOutput.ReadToEndAsync(); var error = await proc.StandardError.ReadToEndAsync(); await proc.WaitForExitAsync(); if (proc.ExitCode != 0) throw new Exception($"yt-dlp error: {error}"); var lines = output.Trim().Split('\n', StringSplitOptions.RemoveEmptyEntries); var title = lines.Length > 0 ? lines[0].Trim() : "Unknown"; var channel = lines.Length > 1 ? lines[1].Trim() : "Unknown"; var durationStr = lines.Length > 2 ? lines[2].Trim() : "0"; var thumbnail = lines.Length > 3 ? lines[3].Trim() : ""; double.TryParse(durationStr, System.Globalization.NumberStyles.Any, System.Globalization.CultureInfo.InvariantCulture, out var durationSeconds); return new VideoInfo { Title = title, Author = channel, Duration = TimeSpan.FromSeconds(durationSeconds), Url = url, ThumbnailUrl = thumbnail }; } public async Task AnalyzeVideoAsync(AnalysisRequest request) { _debugSteps.Clear(); var tempDir = Path.Combine(Path.GetTempPath(), "VideoStudy", Guid.NewGuid().ToString()); Directory.CreateDirectory(tempDir); AddLog($"📁 Inciando processamento em: {tempDir}"); string rawLlmResponse = ""; // Get video info early to use duration for timestamp validation AddLog("ℹ️ Obtendo informações do vídeo para validação de timestamps..."); var videoInfo = await GetVideoInfoAsync(request.VideoUrl); AddLog($"✅ Duração do vídeo (via yt-dlp): {videoInfo.Duration:hh\\:mm\\:ss}"); try { // --- Step 1: Transcription --- AddLog("🌐 Obtendo transcrição via yt-dlp..."); var (transcript, originalTitle) = await GetTranscriptViaYtDlpAsync(request.VideoUrl, request.Language, tempDir); if (string.IsNullOrWhiteSpace(transcript)) throw new Exception("Não foi possível obter a transcrição do vídeo."); AddLog($"✅ Transcrição obtida: '{originalTitle}' ({transcript.Length} chars)."); // --- Step 2: Intelligence --- AddLog("🧠 Enviando transcrição para o Groq (LLM)..."); var (tutorialSections, rawJson, category, docTitle) = await GenerateTutorialContentAsync(transcript, originalTitle, request.Language, request.OutputLanguage, videoInfo.Duration); rawLlmResponse = rawJson; // Save debug MD var debugFile = Path.Combine(Directory.GetCurrentDirectory(), "DEBUG_LAST_RESPONSE.md"); var debugContent = $"# {docTitle} ({category})\n\nSource: {originalTitle}\n\n## Raw JSON\n```json\n{rawJson}\n```\n"; await File.WriteAllTextAsync(debugFile, debugContent); AddLog($"📝 Arquivo de debug gerado: {debugFile}"); // --- Validate and adjust Image Timestamps --- AddLog("⏳ Validando timestamps de imagem gerados pela IA..."); foreach (var section in tutorialSections) { if (TimeSpan.TryParse(section.ImageTimestamp, out var imageTs)) { // Allow for a small buffer (e.g., 5 seconds) just in case, but prefer strict bounds. // If timestamp is more than 5 seconds past video end, consider it invalid. if (imageTs.TotalSeconds > videoInfo.Duration.TotalSeconds + 5) { AddLog($" ⚠️ Timestamp de imagem ({section.ImageTimestamp}) para '{section.Title}' excede a duração do vídeo ({videoInfo.Duration:hh\\:mm\\:ss}). Ajustando para null para evitar erros de captura."); section.ImageTimestamp = null; // Set to null to skip screenshot for this section } } } AddLog("✅ Validação de timestamps concluída."); // --- Step 3: Image Capture --- var sectionsWithImages = tutorialSections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList(); if (sectionsWithImages.Any()) { AddLog($"📸 Capturando {sectionsWithImages.Count} prints usando Puppeteer (Direct Bypass)..."); // Pass videoInfo.Duration to the screenshot method await CaptureScreenshotsWithPuppeteerAsync(request.VideoUrl, tutorialSections, tempDir, videoInfo.Duration); } else { AddLog("⚠️ Nenhuma tag [SCREENSHOT] foi gerada pela IA, ou todas foram invalidadas."); } // --- Step 4: PDF Generation --- AddLog("📄 Gerando PDF final com QuestPDF..."); var pdfBytes = GeneratePdf(docTitle, request.VideoUrl, tutorialSections, category); AddLog("🎉 Processamento concluído com sucesso!"); return new AnalysisResponse { Status = "success", VideoTitle = originalTitle, DocumentTitle = docTitle, Category = category, Transcript = transcript, TutorialSections = tutorialSections, PdfData = pdfBytes, DebugSteps = new List(_debugSteps), RawLlmResponse = rawLlmResponse, Analysis = "Tutorial gerado com sucesso!" }; } catch (Exception ex) { AddLog($"❌ ERRO: {ex.Message}"); return new AnalysisResponse { Status = "error", ErrorMessage = ex.Message, DebugSteps = new List(_debugSteps), RawLlmResponse = rawLlmResponse }; } finally { if (Directory.Exists(tempDir)) { try { Directory.Delete(tempDir, true); } catch { } } } } private async Task<(string transcript, string title)> GetTranscriptViaYtDlpAsync(string url, string language, string outputDir) { var ytDlpPath = GetYtDlpPath(); // Use a safe output template to avoid filesystem issues, but we want the title. // Better: Fetch title separately or read metadata json. // Let's stick to filename trick but ensure safe chars. // Actually, just fetch title with --print title // 1. Fetch Title var titleStartInfo = new ProcessStartInfo { FileName = ytDlpPath, Arguments = $"--print title \"{url}\"", RedirectStandardOutput = true, UseShellExecute = false, CreateNoWindow = true }; var pTitle = Process.Start(titleStartInfo); var title = (await pTitle!.StandardOutput.ReadToEndAsync()).Trim(); await pTitle.WaitForExitAsync(); if (string.IsNullOrEmpty(title)) title = "Video Analysis"; // 2. Fetch Subs var arguments = $"--skip-download --write-sub --write-auto-sub --sub-lang {language},en --sub-format vtt --output \"%(id)s\" \"{url}\""; var startInfo = new ProcessStartInfo { FileName = ytDlpPath, Arguments = arguments, WorkingDirectory = outputDir, RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true }; using var proc = Process.Start(startInfo); await proc.WaitForExitAsync(); var vttFile = Directory.GetFiles(outputDir, "*.vtt").FirstOrDefault(); if (vttFile == null) return (string.Empty, title); return (ParseVttToText(await File.ReadAllTextAsync(vttFile)), title); } private string ParseVttToText(string vttContent) { var lines = vttContent.Split('\n'); var textLines = new List(); var seen = new HashSet(); foreach (var line in lines) { var l = line.Trim(); if (string.IsNullOrWhiteSpace(l) || l.StartsWith("WEBVTT") || l.StartsWith("NOTE") || l.Contains("-->")) continue; l = Regex.Replace(l, @"<[^>]*>", ""); if (!seen.Contains(l)) { textLines.Add(l); seen.Add(l); } } return string.Join(" ", textLines); } private async Task<(List sections, string rawJson, string category, string docTitle)> GenerateTutorialContentAsync(string transcript, string originalTitle, string inputLanguage, string? outputLanguage, TimeSpan videoDuration) { var langMap = new Dictionary { {"en", "English"}, {"pt", "Portuguese (Brazilian)"}, {"es", "Spanish"}, {"fr", "French"}, {"de", "German"}, {"it", "Italian"}, {"ja", "Japanese"}, {"ko", "Korean"}, {"zh", "Chinese"} }; var outputLang = string.IsNullOrWhiteSpace(outputLanguage) ? inputLanguage : outputLanguage; var outputLangName = langMap.GetValueOrDefault(outputLang, outputLang); var chatService = _kernel.GetRequiredService(); // Pre-format video duration strings to avoid potential issues in interpolated string string formattedVideoDuration = videoDuration.ToString("hh\\:mm\\:ss", System.Globalization.CultureInfo.InvariantCulture); string totalSecondsVideoDuration = videoDuration.TotalSeconds.ToString(System.Globalization.CultureInfo.InvariantCulture); var prompt = $@" Você é um Editor Chefe e Analista de Conteúdo Sênior. Receberá: A) TÍTULO ORIGINAL: {originalTitle} B) TRANSCRIÇÃO: {transcript[..Math.Min(transcript.Length, 20000)]} C) DURAÇÃO TOTAL DO VÍDEO: {formattedVideoDuration} ({totalSecondsVideoDuration} segundos) SUA MISSÃO: 1. **Classificar** o vídeo em: 'TUTORIAL', 'MEETING', 'LECTURE' ou 'OTHER'. 2. **Criar um Título Profissional**: - Use o TÍTULO ORIGINAL como base. - Remova clickbaits, emojis e CAPS LOCK excessivo. - O título deve parecer o de um documento técnico ou ata oficial. 3. **Estruturar o Conteúdo**: - Converta o conteúdo em um texto educativo e denso. - Identifique momentos visuais críticos e insira `[SCREENSHOT: HH:MM:SS]` no final do parágrafo correspondente. - **IMPORTANTE:** Os timestamps `HH:MM:SS` para os `[SCREENSHOT]` **NÃO DEVEM, EM HIPÓTESE ALGUMA, EXCEDER A DURAÇÃO TOTAL DO VÍDEO** ({formattedVideoDuration}). Se um momento visual crítico ocorrer perto do final do vídeo, use um timestamp que esteja dentro da duração total. **IMPORTANTE: Todo o texto de saída (documentTitle, títulos das seções e conteúdo) DEVE ser escrito em {outputLangName}.** SAÍDA JSON OBRIGATÓRIA: {{ ""category"": ""TUTORIAL | MEETING | LECTURE | OTHER"", ""documentTitle"": ""Título Profissional Gerado"", ""sections"": [ {{ ""title"": ""Título da Seção"", ""content"": ""Texto explicativo detalhado... [SCREENSHOT: 00:05:30]"" }} ] }}"; var result = await chatService.GetChatMessageContentAsync(prompt); var json = result.Content?.Trim() ?? "{}"; // Extract JSON from LLM response — handles text before/after the JSON block var jsonMatch = Regex.Match(json, @"\{[\s\S]*\}", RegexOptions.Singleline); if (jsonMatch.Success) json = jsonMatch.Value; var sections = new List(); string category = "OTHER"; string docTitle = originalTitle; try { using var doc = JsonDocument.Parse(json); var root = doc.RootElement; if (root.TryGetProperty("category", out var catEl)) category = catEl.GetString() ?? "OTHER"; if (root.TryGetProperty("documentTitle", out var titleEl)) docTitle = titleEl.GetString() ?? originalTitle; foreach (var el in root.GetProperty("sections").EnumerateArray()) { var content = el.GetProperty("content").GetString() ?? ""; var ts = ExtractTimestamp(content); sections.Add(new TutorialSection { Title = el.GetProperty("title").GetString() ?? "", Content = content.Replace($"[SCREENSHOT: {ts}]", "").Trim(), ImageTimestamp = ts }); } } catch { } return (sections, json, category, docTitle); } private string? ExtractTimestamp(string text) { var match = Regex.Match(text, @"\[SCREENSHOT:\s*(\d{2}:\d{2}:\d{2})\]"); return match.Success ? match.Groups[1].Value : null; } private async Task GetRawVideoStreamUrl(string videoUrl) { var ytDlpPath = GetYtDlpPath(); var startInfo = new ProcessStartInfo { FileName = ytDlpPath, Arguments = $"-g -f b \"{videoUrl}\"", RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true }; using var proc = Process.Start(startInfo); if (proc == null) return null; var url = await proc.StandardOutput.ReadLineAsync(); await proc.WaitForExitAsync(); return url?.Trim(); } private async Task CaptureScreenshotsWithPuppeteerAsync(string videoUrl, List sections, string outputDir, TimeSpan videoDuration) { var sectionsWithImages = sections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList(); if (!sectionsWithImages.Any()) return; AddLog("🔍 Obtendo link direto do vídeo (Bypass YouTube Player)..."); var rawVideoUrl = await GetRawVideoStreamUrl(videoUrl); if (string.IsNullOrEmpty(rawVideoUrl)) { AddLog("❌ Falha ao obter link direto. As capturas de tela serão ignoradas."); return; } try { using var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true, Args = new[] { "--no-sandbox", "--window-size=1280,720" } }); using var page = await browser.NewPageAsync(); await page.SetViewportAsync(new ViewPortOptions { Width = 1280, Height = 720 }); var htmlContent = $@" "; await page.SetContentAsync(htmlContent); await page.WaitForSelectorAsync("#raw-player"); // Use the passed videoDuration instead of evaluating it from the page AddLog($"🎥 Duração total do vídeo (passada): {videoDuration:hh\\:mm\\:ss}"); // Loop with index for unique identifiers for (int i = 0; i < sectionsWithImages.Count; i++) { var section = sectionsWithImages[i]; // ImageTimestamp might be null due to earlier validation if (string.IsNullOrEmpty(section.ImageTimestamp)) { AddLog($"📸 Pulando captura para seção '{section.Title}' pois o timestamp foi invalidado."); continue; } if (TimeSpan.TryParse(section.ImageTimestamp, out var ts)) { AddLog($"📸 Processando captura para {section.ImageTimestamp} ('{section.Title}')"); var targetSeconds = (int)ts.TotalSeconds; var candidates = new List<(byte[] ImageData, double Score, int Time, string Hash, int Size)>(); // "Best of 3" capture window: T-1, T, T+1 seconds var timeOffsets = new[] { -1, 0, 1 }; foreach (var offset in timeOffsets) { var captureTime = Math.Max(0, targetSeconds + offset); // Ensure captureTime does not exceed video duration if (captureTime > videoDuration.TotalSeconds + 5) // Add a small buffer for safety { AddLog($" - ⚠️ Tempo de captura {captureTime}s excede a duração do vídeo ({videoDuration.TotalSeconds:F2}s). Ignorando este candidato."); continue; } try { AddLog($" - Solicitando seek para {captureTime}s (candidato para {section.ImageTimestamp})."); // 1. Seek to the target time await page.EvaluateFunctionAsync(@"(s) => {{ const video = document.getElementById('raw-player'); video.currentTime = s; }}", captureTime); // 2. Intelligent Wait: Wait for the video to have enough data to play await page.WaitForFunctionAsync("() => document.getElementById('raw-player').readyState >= 3", new WaitForFunctionOptions { Timeout = 10000 }); var actualCurrentTime = await page.EvaluateFunctionAsync("() => document.getElementById('raw-player').currentTime"); AddLog($" -> Buscado para {captureTime}s. CurrentTime real pós-seek: {actualCurrentTime:F2}s."); // 3. Capture screenshot into memory var screenshotData = await page.ScreenshotDataAsync(new ScreenshotOptions { Type = ScreenshotType.Jpeg, Quality = 90 }); // Calculate MD5 hash string hash; using (var md5 = MD5.Create()) { var hashBytes = md5.ComputeHash(screenshotData); hash = BitConverter.ToString(hashBytes).Replace("-", "").ToLowerInvariant(); } AddLog($" -> Imagem Candidata ({captureTime}s): {screenshotData.Length} bytes, Hash MD5: {hash}"); // 4. Score the image var score = CalculateImageClarityScore(screenshotData); AddLog($" - Candidato {captureTime}s: Score de claridade = {score:F2}"); if (score > 0) { candidates.Add((screenshotData, score, captureTime, hash, screenshotData.Length)); } } catch (WaitTaskTimeoutException) { AddLog($" - ⚠️ Timeout esperando pelo frame em {captureTime}s. Ignorando."); } catch (Exception ex) { AddLog($" - ❌ Erro ao capturar frame em {captureTime}s: {ex.Message}"); } } if (candidates.Any()) { // Select the best image based on the highest score var bestCandidate = candidates.OrderByDescending(c => c.Score).First(); section.ImageData = bestCandidate.ImageData; AddLog($" => ✅ Selecionado frame de {bestCandidate.Time}s (Score: {bestCandidate.Score:F2}, Hash: {bestCandidate.Hash}) para o timestamp {section.ImageTimestamp}."); } else { AddLog($" => ❌ Falha ao capturar um frame válido para {section.ImageTimestamp}. O PDF usará um placeholder."); section.ImageData = null; // Ensure it's null if all attempts fail } } } } catch (Exception ex) { AddLog($"❌ Erro irrecuperável no Puppeteer: {ex.Message}. As capturas de tela restantes serão abortadas."); } } private double CalculateImageClarityScore(byte[] imageBytes) { if (imageBytes == null || imageBytes.Length == 0) return -1.0; try { // Use SKBitmap.Decode for robust image format handling using var image = SKBitmap.Decode(imageBytes); if (image == null || image.Width == 0 || image.Height == 0) { _logger.LogWarning("SkiaSharp failed to decode image or image is empty."); return -1.0; } var brightnessValues = new List(image.Width * image.Height); // Using GetPixel is simpler than handling Pixels array for this use case for (int y = 0; y < image.Height; y++) { for (int x = 0; x < image.Width; x++) { var p = image.GetPixel(x, y); // Luma calculation (standard formula for perceived brightness) var brightness = (p.Red * 0.299f) + (p.Green * 0.587f) + (p.Blue * 0.114f); brightnessValues.Add(brightness); } } if (!brightnessValues.Any()) return 0.0; var avg = brightnessValues.Average(); var sumOfSquares = brightnessValues.Sum(b => Math.Pow(b - avg, 2)); var stdDev = Math.Sqrt(sumOfSquares / brightnessValues.Count); // A very low standard deviation indicates a uniform image (likely black, white, or single color) return stdDev; } catch (Exception ex) { _logger.LogError(ex, "Error calculating image clarity score."); // If SkiaSharp throws an exception, it's a bad image. return -1.0; } } private byte[] GeneratePdf(string docTitle, string videoUrl, List sections, string category) { var categoryColor = category switch { "TUTORIAL" => Colors.Green.Medium, "MEETING" => Colors.Orange.Medium, "LECTURE" => Colors.Purple.Medium, _ => Colors.Blue.Medium }; var document = Document.Create(container => { container.Page(page => { page.Margin(2, Unit.Centimetre); page.DefaultTextStyle(x => x.FontSize(11).FontFamily("Segoe UI").Fallback(f => f.FontFamily("Microsoft YaHei"))); page.Header().Column(c => { c.Item().Row(row => { row.RelativeItem().Text(docTitle).SemiBold().FontSize(20).FontColor(Colors.Black); row.ConstantItem(100).AlignRight().Text(category).Bold().FontSize(10).FontColor(categoryColor); }); c.Item().PaddingTop(5).LineHorizontal(1).LineColor(Colors.Grey.Lighten2); }); page.Content().PaddingVertical(1, Unit.Centimetre).Column(column => { column.Item().Text($"Fonte: {videoUrl}").Italic().FontSize(9).FontColor(Colors.Grey.Medium); column.Item().PaddingBottom(20); foreach (var section in sections) { column.Item().Text(section.Title).Bold().FontSize(14).FontColor(categoryColor); column.Item().Text(text => { text.Span(section.Content); }); if (section.ImageData != null) { column.Item().PaddingVertical(10).Image(section.ImageData).FitWidth(); } else if (!string.IsNullOrEmpty(section.ImageTimestamp)) { // Placeholder for missing image (Graceful Degradation) column.Item().PaddingVertical(10) .Background(Colors.Grey.Lighten3) .Height(100) .AlignCenter() .AlignMiddle() .Text($"[Imagem Indisponível: {section.ImageTimestamp}]") .FontSize(10) .FontColor(Colors.Grey.Darken2); } column.Item().PaddingBottom(15); } }); page.Footer().AlignCenter().Text(x => { x.Span("Gerado por VideoStudy.app - "); x.CurrentPageNumber(); }); }); }); return document.GeneratePdf(); } }