464 lines
19 KiB
C#
464 lines
19 KiB
C#
using System.Diagnostics;
|
|
using System.Text.RegularExpressions;
|
|
using System.Text.Json;
|
|
using Microsoft.SemanticKernel;
|
|
using Microsoft.SemanticKernel.ChatCompletion;
|
|
using QuestPDF.Fluent;
|
|
using QuestPDF.Helpers;
|
|
using QuestPDF.Infrastructure;
|
|
using PuppeteerSharp;
|
|
using VideoStudy.Shared;
|
|
|
|
namespace VideoStudy.API.Services;
|
|
|
|
public class AnalysisService
|
|
{
|
|
private readonly Kernel _kernel;
|
|
private readonly ILogger<AnalysisService> _logger;
|
|
private readonly List<string> _debugSteps = new();
|
|
|
|
public AnalysisService(Kernel kernel, ILogger<AnalysisService> logger)
|
|
{
|
|
_kernel = kernel;
|
|
_logger = logger;
|
|
QuestPDF.Settings.License = LicenseType.Community;
|
|
|
|
Task.Run(async () => {
|
|
try
|
|
{
|
|
var browserFetcher = new BrowserFetcher();
|
|
await browserFetcher.DownloadAsync();
|
|
_logger.LogInformation("Chromium ready.");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to download Chromium.");
|
|
}
|
|
});
|
|
}
|
|
|
|
private void AddLog(string message)
|
|
{
|
|
_logger.LogInformation(message);
|
|
_debugSteps.Add($"[{DateTime.Now:HH:mm:ss}] {message}");
|
|
}
|
|
|
|
private string GetYtDlpPath()
|
|
{
|
|
if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.Windows))
|
|
{
|
|
var exeName = "yt-dlp.exe";
|
|
var currentPath = Path.Combine(Directory.GetCurrentDirectory(), exeName);
|
|
if (File.Exists(currentPath)) return currentPath;
|
|
var basePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, exeName);
|
|
if (File.Exists(basePath)) return basePath;
|
|
var binPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Binaries", exeName);
|
|
if (File.Exists(binPath)) return binPath;
|
|
return "yt-dlp";
|
|
}
|
|
else
|
|
{
|
|
var baseDir = AppDomain.CurrentDomain.BaseDirectory;
|
|
var binariesDir = Path.Combine(baseDir, "Binaries");
|
|
string executableName = "yt-dlp_linux";
|
|
if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.OSX))
|
|
executableName = "yt-dlp_macos";
|
|
var fullPath = Path.Combine(binariesDir, executableName);
|
|
if (!File.Exists(fullPath)) return "yt-dlp";
|
|
try { Process.Start("chmod", $"+x \"{fullPath}\"").WaitForExit(); } catch { }
|
|
return fullPath;
|
|
}
|
|
}
|
|
|
|
public async Task<VideoInfo> GetVideoInfoAsync(string url)
|
|
{
|
|
var ytDlpPath = GetYtDlpPath();
|
|
var startInfo = new ProcessStartInfo
|
|
{
|
|
FileName = ytDlpPath,
|
|
Arguments = $"--print title --print channel --print duration --print thumbnail \"{url}\"",
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
|
|
using var proc = Process.Start(startInfo) ?? throw new Exception("Failed to start yt-dlp");
|
|
var output = await proc.StandardOutput.ReadToEndAsync();
|
|
var error = await proc.StandardError.ReadToEndAsync();
|
|
await proc.WaitForExitAsync();
|
|
|
|
if (proc.ExitCode != 0)
|
|
throw new Exception($"yt-dlp error: {error}");
|
|
|
|
var lines = output.Trim().Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
|
var title = lines.Length > 0 ? lines[0].Trim() : "Unknown";
|
|
var channel = lines.Length > 1 ? lines[1].Trim() : "Unknown";
|
|
var durationStr = lines.Length > 2 ? lines[2].Trim() : "0";
|
|
var thumbnail = lines.Length > 3 ? lines[3].Trim() : "";
|
|
|
|
double.TryParse(durationStr, System.Globalization.NumberStyles.Any,
|
|
System.Globalization.CultureInfo.InvariantCulture, out var durationSeconds);
|
|
|
|
return new VideoInfo
|
|
{
|
|
Title = title,
|
|
Author = channel,
|
|
Duration = TimeSpan.FromSeconds(durationSeconds),
|
|
Url = url,
|
|
ThumbnailUrl = thumbnail
|
|
};
|
|
}
|
|
|
|
public async Task<AnalysisResponse> AnalyzeVideoAsync(AnalysisRequest request)
|
|
{
|
|
_debugSteps.Clear();
|
|
var tempDir = Path.Combine(Path.GetTempPath(), "VideoStudy", Guid.NewGuid().ToString());
|
|
Directory.CreateDirectory(tempDir);
|
|
AddLog($"📁 Inciando processamento em: {tempDir}");
|
|
|
|
string rawLlmResponse = "";
|
|
|
|
try
|
|
{
|
|
// --- Step 1: Transcription ---
|
|
AddLog("🌐 Obtendo transcrição via yt-dlp...");
|
|
var (transcript, originalTitle) = await GetTranscriptViaYtDlpAsync(request.VideoUrl, request.Language, tempDir);
|
|
|
|
if (string.IsNullOrWhiteSpace(transcript))
|
|
throw new Exception("Não foi possível obter a transcrição do vídeo.");
|
|
|
|
AddLog($"✅ Transcrição obtida: '{originalTitle}' ({transcript.Length} chars).");
|
|
|
|
// --- Step 2: Intelligence ---
|
|
AddLog("🧠 Enviando transcrição para o Groq (LLM)...");
|
|
var (tutorialSections, rawJson, category, docTitle) = await GenerateTutorialContentAsync(transcript, originalTitle, request.Language, request.OutputLanguage);
|
|
rawLlmResponse = rawJson;
|
|
|
|
// Save debug MD
|
|
var debugFile = Path.Combine(Directory.GetCurrentDirectory(), "DEBUG_LAST_RESPONSE.md");
|
|
var debugContent = $"# {docTitle} ({category})\n\nSource: {originalTitle}\n\n## Raw JSON\n```json\n{rawJson}\n```\n";
|
|
await File.WriteAllTextAsync(debugFile, debugContent);
|
|
AddLog($"📝 Arquivo de debug gerado: {debugFile}");
|
|
|
|
// --- Step 3: Image Capture ---
|
|
var sectionsWithImages = tutorialSections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList();
|
|
if (sectionsWithImages.Any())
|
|
{
|
|
AddLog($"📸 Capturando {sectionsWithImages.Count} prints usando Puppeteer (Direct Bypass)...");
|
|
await CaptureScreenshotsWithPuppeteerAsync(request.VideoUrl, tutorialSections, tempDir);
|
|
}
|
|
else
|
|
{
|
|
AddLog("⚠️ Nenhuma tag [SCREENSHOT] foi gerada pela IA.");
|
|
}
|
|
|
|
// --- Step 4: PDF Generation ---
|
|
AddLog("📄 Gerando PDF final com QuestPDF...");
|
|
var pdfBytes = GeneratePdf(docTitle, request.VideoUrl, tutorialSections, category);
|
|
|
|
AddLog("🎉 Processamento concluído com sucesso!");
|
|
|
|
return new AnalysisResponse
|
|
{
|
|
Status = "success",
|
|
VideoTitle = originalTitle,
|
|
DocumentTitle = docTitle,
|
|
Category = category,
|
|
Transcript = transcript,
|
|
TutorialSections = tutorialSections,
|
|
PdfData = pdfBytes,
|
|
DebugSteps = new List<string>(_debugSteps),
|
|
RawLlmResponse = rawLlmResponse,
|
|
Analysis = "Tutorial gerado com sucesso!"
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
AddLog($"❌ ERRO: {ex.Message}");
|
|
return new AnalysisResponse
|
|
{
|
|
Status = "error",
|
|
ErrorMessage = ex.Message,
|
|
DebugSteps = new List<string>(_debugSteps),
|
|
RawLlmResponse = rawLlmResponse
|
|
};
|
|
}
|
|
finally
|
|
{
|
|
if (Directory.Exists(tempDir))
|
|
{
|
|
try { Directory.Delete(tempDir, true); } catch { }
|
|
}
|
|
}
|
|
}
|
|
|
|
private async Task<(string transcript, string title)> GetTranscriptViaYtDlpAsync(string url, string language, string outputDir)
|
|
{
|
|
var ytDlpPath = GetYtDlpPath();
|
|
// Use a safe output template to avoid filesystem issues, but we want the title.
|
|
// Better: Fetch title separately or read metadata json.
|
|
// Let's stick to filename trick but ensure safe chars.
|
|
// Actually, just fetch title with --print title
|
|
|
|
// 1. Fetch Title
|
|
var titleStartInfo = new ProcessStartInfo
|
|
{
|
|
FileName = ytDlpPath,
|
|
Arguments = $"--print title \"{url}\"",
|
|
RedirectStandardOutput = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
var pTitle = Process.Start(titleStartInfo);
|
|
var title = (await pTitle!.StandardOutput.ReadToEndAsync()).Trim();
|
|
await pTitle.WaitForExitAsync();
|
|
if (string.IsNullOrEmpty(title)) title = "Video Analysis";
|
|
|
|
// 2. Fetch Subs
|
|
var arguments = $"--skip-download --write-sub --write-auto-sub --sub-lang {language},en --sub-format vtt --output \"%(id)s\" \"{url}\"";
|
|
|
|
var startInfo = new ProcessStartInfo
|
|
{
|
|
FileName = ytDlpPath,
|
|
Arguments = arguments,
|
|
WorkingDirectory = outputDir,
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
|
|
using var proc = Process.Start(startInfo);
|
|
await proc.WaitForExitAsync();
|
|
|
|
var vttFile = Directory.GetFiles(outputDir, "*.vtt").FirstOrDefault();
|
|
if (vttFile == null) return (string.Empty, title);
|
|
|
|
return (ParseVttToText(await File.ReadAllTextAsync(vttFile)), title);
|
|
}
|
|
|
|
private string ParseVttToText(string vttContent)
|
|
{
|
|
var lines = vttContent.Split('\n');
|
|
var textLines = new List<string>();
|
|
var seen = new HashSet<string>();
|
|
|
|
foreach (var line in lines)
|
|
{
|
|
var l = line.Trim();
|
|
if (string.IsNullOrWhiteSpace(l) || l.StartsWith("WEBVTT") || l.StartsWith("NOTE") || l.Contains("-->")) continue;
|
|
l = Regex.Replace(l, @"<[^>]*>", "");
|
|
if (!seen.Contains(l)) { textLines.Add(l); seen.Add(l); }
|
|
}
|
|
return string.Join(" ", textLines);
|
|
}
|
|
|
|
private async Task<(List<TutorialSection> sections, string rawJson, string category, string docTitle)> GenerateTutorialContentAsync(string transcript, string originalTitle, string inputLanguage, string? outputLanguage)
|
|
{
|
|
var langMap = new Dictionary<string, string>
|
|
{
|
|
{"en", "English"}, {"pt", "Portuguese (Brazilian)"}, {"es", "Spanish"},
|
|
{"fr", "French"}, {"de", "German"}, {"it", "Italian"},
|
|
{"ja", "Japanese"}, {"ko", "Korean"}, {"zh", "Chinese"}
|
|
};
|
|
var outputLang = string.IsNullOrWhiteSpace(outputLanguage) ? inputLanguage : outputLanguage;
|
|
var outputLangName = langMap.GetValueOrDefault(outputLang, outputLang);
|
|
|
|
var chatService = _kernel.GetRequiredService<IChatCompletionService>();
|
|
var prompt = $@"
|
|
Você é um Editor Chefe e Analista de Conteúdo Sênior.
|
|
Receberá:
|
|
A) TÍTULO ORIGINAL: {originalTitle}
|
|
B) TRANSCRIÇÃO: {transcript[..Math.Min(transcript.Length, 20000)]}
|
|
|
|
SUA MISSÃO:
|
|
1. **Classificar** o vídeo em: 'TUTORIAL', 'MEETING', 'LECTURE' ou 'OTHER'.
|
|
2. **Criar um Título Profissional**:
|
|
- Use o TÍTULO ORIGINAL como base.
|
|
- Remova clickbaits, emojis e CAPS LOCK excessivo.
|
|
- O título deve parecer o de um documento técnico ou ata oficial.
|
|
3. **Estruturar o Conteúdo**:
|
|
- Converta o conteúdo em um texto educativo e denso.
|
|
- Identifique momentos visuais críticos e insira `[SCREENSHOT: HH:MM:SS]` no final do parágrafo correspondente.
|
|
|
|
**IMPORTANTE: Todo o texto de saída (documentTitle, títulos das seções e conteúdo) DEVE ser escrito em {outputLangName}.**
|
|
|
|
SAÍDA JSON OBRIGATÓRIA:
|
|
{{
|
|
""category"": ""TUTORIAL | MEETING | LECTURE | OTHER"",
|
|
""documentTitle"": ""Título Profissional Gerado"",
|
|
""sections"": [
|
|
{{
|
|
""title"": ""Título da Seção"",
|
|
""content"": ""Texto explicativo detalhado... [SCREENSHOT: 00:05:30]""
|
|
}}
|
|
]
|
|
}}";
|
|
|
|
var result = await chatService.GetChatMessageContentAsync(prompt);
|
|
var json = result.Content?.Trim() ?? "{}";
|
|
// Extract JSON from LLM response — handles text before/after the JSON block
|
|
var jsonMatch = Regex.Match(json, @"\{[\s\S]*\}", RegexOptions.Singleline);
|
|
if (jsonMatch.Success)
|
|
json = jsonMatch.Value;
|
|
|
|
var sections = new List<TutorialSection>();
|
|
string category = "OTHER";
|
|
string docTitle = originalTitle;
|
|
|
|
try {
|
|
using var doc = JsonDocument.Parse(json);
|
|
var root = doc.RootElement;
|
|
|
|
if (root.TryGetProperty("category", out var catEl)) category = catEl.GetString() ?? "OTHER";
|
|
if (root.TryGetProperty("documentTitle", out var titleEl)) docTitle = titleEl.GetString() ?? originalTitle;
|
|
|
|
foreach (var el in root.GetProperty("sections").EnumerateArray()) {
|
|
var content = el.GetProperty("content").GetString() ?? "";
|
|
var ts = ExtractTimestamp(content);
|
|
sections.Add(new TutorialSection {
|
|
Title = el.GetProperty("title").GetString() ?? "",
|
|
Content = content.Replace($"[SCREENSHOT: {ts}]", "").Trim(),
|
|
ImageTimestamp = ts
|
|
});
|
|
}
|
|
} catch { }
|
|
return (sections, json, category, docTitle);
|
|
}
|
|
|
|
private string? ExtractTimestamp(string text)
|
|
{
|
|
var match = Regex.Match(text, @"\[SCREENSHOT:\s*(\d{2}:\d{2}:\d{2})\]");
|
|
return match.Success ? match.Groups[1].Value : null;
|
|
}
|
|
|
|
private async Task<string?> GetRawVideoStreamUrl(string videoUrl)
|
|
{
|
|
var ytDlpPath = GetYtDlpPath();
|
|
var startInfo = new ProcessStartInfo
|
|
{
|
|
FileName = ytDlpPath,
|
|
Arguments = $"-g -f b \"{videoUrl}\"",
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
|
|
using var proc = Process.Start(startInfo);
|
|
if (proc == null) return null;
|
|
var url = await proc.StandardOutput.ReadLineAsync();
|
|
await proc.WaitForExitAsync();
|
|
return url?.Trim();
|
|
}
|
|
|
|
private async Task CaptureScreenshotsWithPuppeteerAsync(string videoUrl, List<TutorialSection> sections, string outputDir)
|
|
{
|
|
var sectionsWithImages = sections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList();
|
|
if (!sectionsWithImages.Any()) return;
|
|
|
|
AddLog("🔍 Obtendo link direto do vídeo (Bypass YouTube Player)...");
|
|
var rawVideoUrl = await GetRawVideoStreamUrl(videoUrl);
|
|
if (string.IsNullOrEmpty(rawVideoUrl)) { AddLog("❌ Falha ao obter link direto."); return; }
|
|
|
|
try
|
|
{
|
|
using var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true, Args = new[] { "--no-sandbox", "--window-size=1280,720" } });
|
|
using var page = await browser.NewPageAsync();
|
|
await page.SetViewportAsync(new ViewPortOptions { Width = 1280, Height = 720 });
|
|
|
|
var html = $@"<html><body style='margin:0;background:black;overflow:hidden;'><video id='v' width='1280' height='720' muted><source src='{rawVideoUrl}' type='video/mp4'></video></body></html>";
|
|
await page.SetContentAsync(html);
|
|
await page.WaitForSelectorAsync("video");
|
|
|
|
foreach (var section in sectionsWithImages)
|
|
{
|
|
if (TimeSpan.TryParse(section.ImageTimestamp, out var ts))
|
|
{
|
|
var sec = (int)ts.TotalSeconds;
|
|
AddLog($"🌐 Renderizando frame: {section.ImageTimestamp}...");
|
|
|
|
await page.EvaluateFunctionAsync(@"(s) => {
|
|
return new Promise(r => {
|
|
const v = document.getElementById('v');
|
|
v.currentTime = s;
|
|
v.addEventListener('seeked', r, {once:true});
|
|
});
|
|
}", sec);
|
|
|
|
await Task.Delay(500);
|
|
var path = Path.Combine(outputDir, $"snap_{sec}.jpg");
|
|
await page.ScreenshotAsync(path, new ScreenshotOptions { Type = ScreenshotType.Jpeg, Quality = 90 });
|
|
if (File.Exists(path)) section.ImageData = await File.ReadAllBytesAsync(path);
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex) { AddLog($"❌ Erro Puppeteer: {ex.Message}"); }
|
|
}
|
|
|
|
private byte[] GeneratePdf(string docTitle, string videoUrl, List<TutorialSection> sections, string category)
|
|
{
|
|
var categoryColor = category switch
|
|
{
|
|
"TUTORIAL" => Colors.Green.Medium,
|
|
"MEETING" => Colors.Orange.Medium,
|
|
"LECTURE" => Colors.Purple.Medium,
|
|
_ => Colors.Blue.Medium
|
|
};
|
|
|
|
var document = Document.Create(container =>
|
|
{
|
|
container.Page(page =>
|
|
{
|
|
page.Margin(2, Unit.Centimetre);
|
|
page.DefaultTextStyle(x => x.FontSize(11).FontFamily("Segoe UI").Fallback(f => f.FontFamily("Microsoft YaHei")));
|
|
|
|
page.Header().Column(c =>
|
|
{
|
|
c.Item().Row(row =>
|
|
{
|
|
row.RelativeItem().Text(docTitle).SemiBold().FontSize(20).FontColor(Colors.Black);
|
|
row.ConstantItem(100).AlignRight().Text(category).Bold().FontSize(10).FontColor(categoryColor);
|
|
});
|
|
c.Item().PaddingTop(5).LineHorizontal(1).LineColor(Colors.Grey.Lighten2);
|
|
});
|
|
|
|
page.Content().PaddingVertical(1, Unit.Centimetre).Column(column =>
|
|
{
|
|
column.Item().Text($"Fonte: {videoUrl}").Italic().FontSize(9).FontColor(Colors.Grey.Medium);
|
|
column.Item().PaddingBottom(20);
|
|
|
|
foreach (var section in sections)
|
|
{
|
|
column.Item().Text(section.Title).Bold().FontSize(14).FontColor(categoryColor);
|
|
column.Item().Text(text => { text.Span(section.Content); });
|
|
|
|
if (section.ImageData != null)
|
|
{
|
|
column.Item().PaddingVertical(10).Image(section.ImageData).FitWidth();
|
|
}
|
|
else if (!string.IsNullOrEmpty(section.ImageTimestamp))
|
|
{
|
|
// Placeholder for missing image (Graceful Degradation)
|
|
column.Item().PaddingVertical(10)
|
|
.Background(Colors.Grey.Lighten3)
|
|
.Height(100)
|
|
.AlignCenter()
|
|
.AlignMiddle()
|
|
.Text($"[Imagem Indisponível: {section.ImageTimestamp}]")
|
|
.FontSize(10)
|
|
.FontColor(Colors.Grey.Darken2);
|
|
}
|
|
|
|
column.Item().PaddingBottom(15);
|
|
}
|
|
});
|
|
|
|
page.Footer().AlignCenter().Text(x => { x.Span("Gerado por VideoStudy.app - "); x.CurrentPageNumber(); });
|
|
});
|
|
});
|
|
return document.GeneratePdf();
|
|
}
|
|
}
|