using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text.Json; using System.Text.RegularExpressions; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using QuestPDF.Fluent; using QuestPDF.Helpers; using QuestPDF.Infrastructure; using VideoStudy.Shared; namespace VideoStudy.API.Services; public class AnalysisService { private readonly Kernel _kernel; private readonly ILogger _logger; private readonly IConfiguration _configuration; public AnalysisService(Kernel kernel, ILogger logger, IConfiguration configuration) { _kernel = kernel; _logger = logger; _configuration = configuration; QuestPDF.Settings.License = LicenseType.Community; } private string GetYtDlpPath() { string exeName = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? "yt-dlp.exe" : RuntimeInformation.IsOSPlatform(OSPlatform.OSX) ? "yt-dlp_macos" : "yt-dlp_linux"; // Walk up from base directory looking for yt-dlp var dir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory); for (int i = 0; i < 7; i++) { var path = Path.Combine(dir.FullName, exeName); if (File.Exists(path)) return path; var binPath = Path.Combine(dir.FullName, "Binaries", exeName); if (File.Exists(binPath)) return binPath; if (dir.Parent == null) break; dir = dir.Parent; } return "yt-dlp"; // fallback to PATH } private string GetCookiesArg() { var browser = _configuration["YtDlp:CookiesBrowser"]; return string.IsNullOrWhiteSpace(browser) ? "" : $"--cookies-from-browser {browser}"; } public async IAsyncEnumerable AnalyzeVideoAsync(AnalysisRequest request, [EnumeratorCancellation] CancellationToken cancellationToken = default) { var tempDir = Path.Combine(Path.GetTempPath(), "VideoStudy", Guid.NewGuid().ToString()); Directory.CreateDirectory(tempDir); string? errorMessage = null; AnalysisResult? finalResult = null; yield return new AnalysisEvent { ProgressPercentage = 5, Message = "Obtendo informações do vídeo..." }; VideoInfo? videoInfo = null; try { videoInfo = await GetVideoInfoAsync(request.VideoUrl, cancellationToken); } catch (Exception ex) { errorMessage = $"Erro ao acessar o YouTube: {ex.Message}"; } if (errorMessage == null && videoInfo != null) { yield return new AnalysisEvent { ProgressPercentage = 10, Message = $"Analisando: {videoInfo.Title}" }; yield return new AnalysisEvent { ProgressPercentage = 15, Message = "Obtendo transcrição..." }; string? transcript = null; string? transcriptReadable = null; try { (transcript, transcriptReadable) = await GetTranscriptViaYtDlpAsync(request.VideoUrl, request.Language, tempDir); if (string.IsNullOrWhiteSpace(transcript)) errorMessage = "O vídeo não possui transcrição disponível."; } catch (Exception ex) { errorMessage = $"Erro na transcrição: {ex.Message}"; } if (errorMessage == null && transcript != null) { yield return new AnalysisEvent { ProgressPercentage = 40, Message = "IA estruturando conteúdo..." }; List? sections = null; string? rawJson = null, category = null, docTitle = null, summary = null; try { var aiResult = await GenerateTutorialContentAsync(transcript, videoInfo, request.Language, request.OutputLanguage, request.UserContext, cancellationToken); sections = aiResult.sections; rawJson = aiResult.rawJson; category = aiResult.category; docTitle = aiResult.docTitle; summary = aiResult.summary; } catch (Exception ex) { errorMessage = $"IA Indisponível: {ex.Message}. Verifique a chave do Groq."; } if (errorMessage == null && sections != null) { var sectionsWithImages = sections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList(); if (sectionsWithImages.Any()) { yield return new AnalysisEvent { ProgressPercentage = 70, Message = $"Capturando {sectionsWithImages.Count} imagens com FFmpeg..." }; try { await CaptureScreenshotsAsync(request.VideoUrl, sectionsWithImages, videoInfo.Duration, cancellationToken); } catch (Exception ex) { _logger.LogWarning(ex, "Falha na captura de screenshots — continuando sem imagens."); } } yield return new AnalysisEvent { ProgressPercentage = 90, Message = "Gerando documento PDF..." }; try { var pdfBytes = GeneratePdf(docTitle!, summary!, request.VideoUrl, sections, category!, transcriptReadable); finalResult = new AnalysisResult { VideoTitle = videoInfo.Title, DocumentTitle = docTitle!, Summary = summary!, Category = category!, Transcript = transcript, TutorialSections = sections, PdfData = pdfBytes, RawLlmResponse = rawJson }; } catch (Exception ex) { errorMessage = $"Erro ao gerar PDF: {ex.Message}"; } } } } try { if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); } catch { } if (errorMessage != null) { yield return new AnalysisEvent { IsError = true, Message = errorMessage, ProgressPercentage = 100 }; } else if (finalResult != null) { yield return new AnalysisEvent { ProgressPercentage = 100, Message = "Concluído!", Result = finalResult }; } } private async Task CaptureScreenshotsAsync(string videoUrl, List sections, TimeSpan videoDuration, CancellationToken ct) { var streamUrl = await GetRawVideoStreamUrl(videoUrl); if (string.IsNullOrEmpty(streamUrl)) return; using var sem = new SemaphoreSlim(2); await Task.WhenAll(sections.Select(s => CaptureFrameAsync(s, streamUrl, videoDuration, sem, ct))); } private async Task CaptureFrameAsync(TutorialSection section, string streamUrl, TimeSpan duration, SemaphoreSlim sem, CancellationToken ct) { await sem.WaitAsync(ct); var outputFile = Path.Combine(Path.GetTempPath(), $"vs_frame_{Guid.NewGuid()}.jpg"); try { if (!TimeSpan.TryParse(section.ImageTimestamp, out var ts) || ts > duration) return; var args = $"-ss {ts:hh\\:mm\\:ss} -i \"{streamUrl}\" -frames:v 1 -q:v 2 -y \"{outputFile}\""; var psi = new ProcessStartInfo { FileName = "ffmpeg", Arguments = args, RedirectStandardError = false, UseShellExecute = false, CreateNoWindow = true }; using var p = Process.Start(psi)!; await p.WaitForExitAsync(ct); if (p.ExitCode == 0 && File.Exists(outputFile)) section.ImageData = await File.ReadAllBytesAsync(outputFile, ct); } catch (Exception ex) { _logger.LogWarning(ex, "Screenshot falhou em {Timestamp}", section.ImageTimestamp); } finally { if (File.Exists(outputFile)) File.Delete(outputFile); sem.Release(); } } public async Task GetVideoInfoAsync(string url, CancellationToken ct) { var ytdlp = GetYtDlpPath(); var cookies = GetCookiesArg(); var psi = new ProcessStartInfo { FileName = ytdlp, Arguments = $"{cookies} --print title --print channel --print duration --print thumbnail \"{url}\"", RedirectStandardOutput = true, UseShellExecute = false, CreateNoWindow = true }; using var proc = Process.Start(psi)!; await proc.WaitForExitAsync(ct); var output = await proc.StandardOutput.ReadToEndAsync(ct); var lines = output.Split('\n', StringSplitOptions.RemoveEmptyEntries); if (lines.Length < 1) throw new Exception("Falha ao ler dados do vídeo via yt-dlp."); double.TryParse(lines.Length > 2 ? lines[2] : "0", System.Globalization.NumberStyles.Any, System.Globalization.CultureInfo.InvariantCulture, out var sec); return new VideoInfo { Title = lines[0].Trim(), Author = lines.Length > 1 ? lines[1].Trim() : "", Duration = TimeSpan.FromSeconds(sec), ThumbnailUrl = lines.Length > 3 ? lines[3].Trim() : "", Url = url }; } private async Task<(string flat, string readable)> GetTranscriptViaYtDlpAsync(string url, string lang, string dir) { var ytdlp = GetYtDlpPath(); var cookies = GetCookiesArg(); var psi = new ProcessStartInfo { FileName = ytdlp, Arguments = $"{cookies} --skip-download --write-auto-sub --sub-lang {lang},en --sub-format vtt --output \"%(id)s\" \"{url}\"", WorkingDirectory = dir, RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true }; using var p = Process.Start(psi)!; await p.WaitForExitAsync(); var file = Directory.GetFiles(dir, "*.vtt").FirstOrDefault(); if (file == null) return ("", ""); var vtt = await File.ReadAllTextAsync(file); return (ParseVttToFlat(vtt), ParseVttToReadable(vtt)); } private string ParseVttToFlat(string vtt) { var lines = vtt.Split('\n') .Select(l => l.Trim()) .Where(l => !string.IsNullOrEmpty(l) && !l.StartsWith("WEBVTT") && !l.StartsWith("NOTE") && !l.Contains("-->")); return string.Join(" ", lines.Select(l => Regex.Replace(l, @"<[^>]*>", ""))).Replace(" ", " "); } private string ParseVttToReadable(string vtt) { // Parse cues: timestamp line + text lines var cues = new List<(TimeSpan time, string text)>(); var lines = vtt.Split('\n').Select(l => l.Trim()).ToArray(); for (int i = 0; i < lines.Length; i++) { var line = lines[i]; if (!line.Contains("-->")) continue; // Parse start time from "HH:MM:SS.mmm --> HH:MM:SS.mmm" var timePart = line.Split("-->")[0].Trim().Replace(',', '.'); if (!TimeSpan.TryParse(timePart, out var ts)) continue; // Collect text lines until blank var textLines = new List(); i++; while (i < lines.Length && !string.IsNullOrEmpty(lines[i]) && !lines[i].Contains("-->")) { var t = Regex.Replace(lines[i], @"<[^>]*>", "").Trim(); if (!string.IsNullOrEmpty(t)) textLines.Add(t); i++; } i--; // step back, outer loop will increment if (textLines.Count > 0) cues.Add((ts, string.Join(" ", textLines))); } if (cues.Count == 0) return ParseVttToFlat(vtt); // Merge consecutive cues with same/similar text (VTT often duplicates lines) var merged = new List<(TimeSpan time, string text)>(); foreach (var cue in cues) { if (merged.Count > 0 && merged[^1].text == cue.text) continue; merged.Add(cue); } // Group into paragraphs every ~60 seconds var sb = new System.Text.StringBuilder(); TimeSpan? paraStart = null; var paraWords = new List(); void FlushParagraph() { if (paraWords.Count == 0) return; sb.AppendLine($"[{paraStart!.Value:hh\\:mm\\:ss}] {string.Join(" ", paraWords)}"); sb.AppendLine(); paraWords.Clear(); paraStart = null; } foreach (var (time, text) in merged) { if (paraStart == null) paraStart = time; paraWords.Add(text); if ((time - paraStart.Value).TotalSeconds >= 60) FlushParagraph(); } FlushParagraph(); return sb.ToString().TrimEnd(); } private async Task<(List sections, string rawJson, string category, string docTitle, string summary)> GenerateTutorialContentAsync(string transcript, VideoInfo video, string inLang, string? outLang, string? userContext, CancellationToken ct) { var langMap = new Dictionary { { "en", "English" }, { "pt", "Portuguese (Brazilian)" }, { "es", "Spanish" }, { "fr", "French" } }; var outName = langMap.GetValueOrDefault(outLang ?? inLang, "Portuguese (Brazilian)"); var dur = video.Duration.ToString(@"hh\:mm\:ss"); var contextSection = string.IsNullOrWhiteSpace(userContext) ? "" : $"\n### CONTEXTO DO USUÁRIO:\n{userContext}\nUse este contexto para ajustar o nível de detalhe, linguagem e foco da análise.\n"; var prompt = $@"Você é um ANALISTA TÉCNICO DE CONTEÚDO especializado em converter vídeos em documentação estruturada. {contextSection} ### REGRAS: 1. Transforme cada explicação em um tópico técnico detalhado — NÃO resuma demais. 2. Dedique o mesmo nível de profundidade a todos os tópicos. 3. Garanta que o tema ""{video.Title}"" seja a seção de maior clareza. 4. Insira `[SCREENSHOT: HH:MM:SS]` ao final de parágrafos com algo visualmente importante. (Limite: {dur}) ### DADOS: - Título: {video.Title} - Transcrição: {transcript[..Math.Min(transcript.Length, 25000)]} ### FORMATO DE SAÍDA (JSON): {{ ""category"": ""TUTORIAL | LECTURE | OTHER"", ""shortTitle"": ""Título Curto e Limpo"", ""summary"": ""Um parágrafo de até 4 linhas resumindo o valor principal do vídeo."", ""sections"": [ {{ ""title"": ""Título do Tópico"", ""content"": ""Explicação densa... [SCREENSHOT: HH:MM:SS]"" }} ] }} Escreva tudo em {outName}."; var chatService = _kernel.GetRequiredService(); var result = await chatService.GetChatMessageContentAsync(prompt, cancellationToken: ct); string rawContent = result.Content ?? "{}"; _logger.LogInformation("Resposta bruta da IA: {RawContent}", rawContent); var jsonMatch = Regex.Match(rawContent, @"\{[\s\S]*\}"); if (!jsonMatch.Success) throw new Exception("A IA não retornou um JSON válido."); string json = jsonMatch.Value; using var doc = JsonDocument.Parse(json); var root = doc.RootElement; var sections = root.GetProperty("sections").EnumerateArray().Select(el => { var content = el.GetProperty("content").GetString() ?? ""; var tsMatch = Regex.Match(content, @"\[SCREENSHOT:\s*(\d{2}:\d{2}:\d{2})\]"); return new TutorialSection { Title = el.GetProperty("title").GetString() ?? "", Content = Regex.Replace(content, @"\[SCREENSHOT: \d{2}:\d{2}:\d{2}\]", "").Trim(), ImageTimestamp = tsMatch.Success ? tsMatch.Groups[1].Value : null }; }).ToList(); return (sections, json, root.GetProperty("category").GetString() ?? "OTHER", root.GetProperty("shortTitle").GetString() ?? video.Title, root.GetProperty("summary").GetString() ?? ""); } private async Task GetRawVideoStreamUrl(string url) { var ytdlp = GetYtDlpPath(); var cookies = GetCookiesArg(); var psi = new ProcessStartInfo { FileName = ytdlp, Arguments = $"{cookies} -g -f \"bv*[height<=720][ext=mp4]/bv*[height<=720]/b\" \"{url}\"", RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true }; using var proc = Process.Start(psi)!; var line = await proc.StandardOutput.ReadLineAsync(); await proc.WaitForExitAsync(); return line?.Trim(); } private byte[] GeneratePdf(string title, string summary, string url, List sections, string category, string? transcriptReadable = null) { var color = category switch { "TUTORIAL" => Colors.Green.Medium, "LECTURE" => Colors.Orange.Medium, _ => Colors.Blue.Medium }; return Document.Create(container => { // Main content page container.Page(page => { page.Margin(2, Unit.Centimetre); page.DefaultTextStyle(x => x.FontSize(11).FontFamily("Segoe UI")); page.Header().Column(c => { c.Item().Row(r => { r.RelativeItem().Text(title).SemiBold().FontSize(22).FontColor(Colors.Blue.Darken3); r.ConstantItem(80).AlignRight().Text(category).Bold().FontSize(10).FontColor(color); }); c.Item().PaddingTop(5).LineHorizontal(1).LineColor(Colors.Grey.Lighten2); }); page.Content().PaddingVertical(1, Unit.Centimetre).Column(col => { col.Item().Background(Colors.Grey.Lighten4).Padding(10).Column(rc => { rc.Item().Text("Resumo").Bold().FontSize(12).FontColor(Colors.Blue.Medium); rc.Item().PaddingTop(2).Text(summary).Italic(); }); col.Item().PaddingTop(10).Text(t => { t.Span("Fonte: ").SemiBold(); t.Span(url).Italic().FontSize(9); }); foreach (var s in sections) { col.Item().PaddingTop(20).Text(s.Title).Bold().FontSize(16).FontColor(color); col.Item().PaddingTop(5).Text(s.Content).LineHeight(1.5f); if (s.ImageData != null) col.Item().PaddingVertical(10).Image(s.ImageData).FitWidth(); } }); page.Footer().AlignCenter().Text(x => { x.Span("VideoStudy.app — "); x.CurrentPageNumber(); }); }); // Transcript appendix page if (!string.IsNullOrWhiteSpace(transcriptReadable)) { container.Page(page => { page.Margin(2, Unit.Centimetre); page.DefaultTextStyle(x => x.FontSize(10).FontFamily("Segoe UI")); page.Header().Column(c => { c.Item().Row(r => { r.RelativeItem().Text("Apêndice — Transcrição").SemiBold().FontSize(18).FontColor(Colors.Grey.Darken2); r.ConstantItem(80).AlignRight().Text(title).FontSize(8).FontColor(Colors.Grey.Medium).Italic(); }); c.Item().PaddingTop(5).LineHorizontal(1).LineColor(Colors.Grey.Lighten2); c.Item().PaddingTop(4).Text("Cada parágrafo representa aproximadamente 60 segundos. O timestamp indica o início do trecho.") .FontSize(8).Italic().FontColor(Colors.Grey.Medium); }); page.Content().PaddingVertical(1, Unit.Centimetre).Column(col => { foreach (var paragraph in transcriptReadable.Split("\n\n", StringSplitOptions.RemoveEmptyEntries)) { var trimmed = paragraph.Trim(); if (string.IsNullOrEmpty(trimmed)) continue; // Split timestamp from text: "[HH:MM:SS] rest of text" var bracketEnd = trimmed.IndexOf(']'); if (bracketEnd > 0 && trimmed.StartsWith('[')) { var timestamp = trimmed[..(bracketEnd + 1)]; var text = trimmed[(bracketEnd + 1)..].Trim(); col.Item().PaddingBottom(8).Column(p => { p.Item().Text(timestamp).Bold().FontSize(9).FontColor(Colors.Blue.Medium); p.Item().PaddingTop(2).Text(text).LineHeight(1.5f); }); } else { col.Item().PaddingBottom(8).Text(trimmed).LineHeight(1.5f); } } }); page.Footer().AlignCenter().Text(x => { x.Span("VideoStudy.app — "); x.CurrentPageNumber(); }); }); } }).GeneratePdf(); } }