335 lines
14 KiB
C#
335 lines
14 KiB
C#
using System.Diagnostics;
|
|
using System.Text.RegularExpressions;
|
|
using System.Text.Json;
|
|
using Microsoft.SemanticKernel;
|
|
using Microsoft.SemanticKernel.ChatCompletion;
|
|
using QuestPDF.Fluent;
|
|
using QuestPDF.Helpers;
|
|
using QuestPDF.Infrastructure;
|
|
using PuppeteerSharp;
|
|
using VideoStudy.Shared;
|
|
|
|
namespace VideoStudy.API.Services;
|
|
|
|
public class AnalysisService
|
|
{
|
|
private readonly Kernel _kernel;
|
|
private readonly ILogger<AnalysisService> _logger;
|
|
private readonly List<string> _debugSteps = new();
|
|
|
|
public AnalysisService(Kernel kernel, ILogger<AnalysisService> logger)
|
|
{
|
|
_kernel = kernel;
|
|
_logger = logger;
|
|
QuestPDF.Settings.License = LicenseType.Community;
|
|
|
|
Task.Run(async () => {
|
|
try
|
|
{
|
|
var browserFetcher = new BrowserFetcher();
|
|
await browserFetcher.DownloadAsync();
|
|
_logger.LogInformation("Chromium ready.");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to download Chromium.");
|
|
}
|
|
});
|
|
}
|
|
|
|
private void AddLog(string message)
|
|
{
|
|
_logger.LogInformation(message);
|
|
_debugSteps.Add($"[{DateTime.Now:HH:mm:ss}] {message}");
|
|
}
|
|
|
|
private string GetYtDlpPath()
|
|
{
|
|
if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.Windows))
|
|
{
|
|
var exeName = "yt-dlp.exe";
|
|
var currentPath = Path.Combine(Directory.GetCurrentDirectory(), exeName);
|
|
if (File.Exists(currentPath)) return currentPath;
|
|
var basePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, exeName);
|
|
if (File.Exists(basePath)) return basePath;
|
|
var binPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Binaries", exeName);
|
|
if (File.Exists(binPath)) return binPath;
|
|
return "yt-dlp";
|
|
}
|
|
else
|
|
{
|
|
var baseDir = AppDomain.CurrentDomain.BaseDirectory;
|
|
var binariesDir = Path.Combine(baseDir, "Binaries");
|
|
string executableName = "yt-dlp_linux";
|
|
if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform.OSX))
|
|
executableName = "yt-dlp_macos";
|
|
var fullPath = Path.Combine(binariesDir, executableName);
|
|
if (!File.Exists(fullPath)) return "yt-dlp";
|
|
try { Process.Start("chmod", $"+x \"{fullPath}\"").WaitForExit(); } catch { }
|
|
return fullPath;
|
|
}
|
|
}
|
|
|
|
public async Task<AnalysisResponse> AnalyzeVideoAsync(AnalysisRequest request)
|
|
{
|
|
_debugSteps.Clear();
|
|
var tempDir = Path.Combine(Path.GetTempPath(), "VideoStudy", Guid.NewGuid().ToString());
|
|
Directory.CreateDirectory(tempDir);
|
|
AddLog($"📁 Inciando processamento em: {tempDir}");
|
|
|
|
string rawLlmResponse = "";
|
|
|
|
try
|
|
{
|
|
// --- Step 1: Transcription ---
|
|
AddLog("🌐 Obtendo transcrição via yt-dlp...");
|
|
var transcript = await GetTranscriptViaYtDlpAsync(request.VideoUrl, request.Language, tempDir);
|
|
|
|
if (string.IsNullOrWhiteSpace(transcript))
|
|
throw new Exception("Não foi possível obter a transcrição do vídeo.");
|
|
|
|
AddLog($"✅ Transcrição obtida ({transcript.Length} caracteres).");
|
|
|
|
// --- Step 2: Intelligence ---
|
|
AddLog("🧠 Enviando transcrição para o Groq (LLM)...");
|
|
var (tutorialSections, rawJson) = await GenerateTutorialContentAsync(transcript, request.Language);
|
|
rawLlmResponse = rawJson;
|
|
|
|
// Save debug MD in project root
|
|
var debugFile = Path.Combine(Directory.GetCurrentDirectory(), "DEBUG_LAST_RESPONSE.md");
|
|
var debugContent = $"# Debug Groq Response\n\nURL: {request.VideoUrl}\n\n## Raw JSON\n```json\n{rawJson}\n```\n\n## Sections\n" +
|
|
string.Join("\n\n", tutorialSections.Select(s => $"### {s.Title}\n{s.Content}\n**Timestamp:** {s.ImageTimestamp}"));
|
|
await File.WriteAllTextAsync(debugFile, debugContent);
|
|
AddLog($"📝 Arquivo de debug gerado: {debugFile}");
|
|
|
|
// --- Step 3: Image Capture ---
|
|
var sectionsWithImages = tutorialSections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList();
|
|
if (sectionsWithImages.Any())
|
|
{
|
|
AddLog($"📸 Capturando {sectionsWithImages.Count} prints usando Puppeteer (Direct Bypass)...");
|
|
await CaptureScreenshotsWithPuppeteerAsync(request.VideoUrl, tutorialSections, tempDir);
|
|
}
|
|
else
|
|
{
|
|
AddLog("⚠️ Nenhuma tag [SCREENSHOT] foi gerada pela IA.");
|
|
}
|
|
|
|
// --- Step 4: PDF Generation ---
|
|
AddLog("📄 Gerando PDF final com QuestPDF...");
|
|
var pdfBytes = GeneratePdf(request.VideoUrl, tutorialSections);
|
|
|
|
AddLog("🎉 Processamento concluído com sucesso!");
|
|
|
|
return new AnalysisResponse
|
|
{
|
|
Status = "success",
|
|
VideoTitle = request.VideoUrl,
|
|
Transcript = transcript,
|
|
TutorialSections = tutorialSections,
|
|
PdfData = pdfBytes,
|
|
DebugSteps = new List<string>(_debugSteps),
|
|
RawLlmResponse = rawLlmResponse,
|
|
Analysis = "Tutorial gerado com sucesso!"
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
AddLog($"❌ ERRO: {ex.Message}");
|
|
return new AnalysisResponse
|
|
{
|
|
Status = "error",
|
|
ErrorMessage = ex.Message,
|
|
DebugSteps = new List<string>(_debugSteps),
|
|
RawLlmResponse = rawLlmResponse
|
|
};
|
|
}
|
|
finally
|
|
{
|
|
if (Directory.Exists(tempDir))
|
|
{
|
|
try { Directory.Delete(tempDir, true); } catch { }
|
|
}
|
|
}
|
|
}
|
|
|
|
private async Task<string> GetTranscriptViaYtDlpAsync(string url, string language, string outputDir)
|
|
{
|
|
var ytDlpPath = GetYtDlpPath();
|
|
var arguments = $"--skip-download --write-sub --write-auto-sub --sub-lang {language},en --sub-format vtt --output \"%(title)s\" \"{url}\"";
|
|
|
|
var startInfo = new ProcessStartInfo
|
|
{
|
|
FileName = ytDlpPath,
|
|
Arguments = arguments,
|
|
WorkingDirectory = outputDir,
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
|
|
using var proc = Process.Start(startInfo);
|
|
await proc.WaitForExitAsync();
|
|
|
|
var vttFile = Directory.GetFiles(outputDir, "*.vtt").FirstOrDefault();
|
|
if (vttFile == null) return string.Empty;
|
|
|
|
return ParseVttToText(await File.ReadAllTextAsync(vttFile));
|
|
}
|
|
|
|
private string ParseVttToText(string vttContent)
|
|
{
|
|
var lines = vttContent.Split('\n');
|
|
var textLines = new List<string>();
|
|
var seen = new HashSet<string>();
|
|
|
|
foreach (var line in lines)
|
|
{
|
|
var l = line.Trim();
|
|
if (string.IsNullOrWhiteSpace(l) || l.StartsWith("WEBVTT") || l.StartsWith("NOTE") || l.Contains("-->")) continue;
|
|
l = Regex.Replace(l, @"<[^>]*>", "");
|
|
if (!seen.Contains(l)) { textLines.Add(l); seen.Add(l); }
|
|
}
|
|
return string.Join(" ", textLines);
|
|
}
|
|
|
|
private async Task<(List<TutorialSection> sections, string rawJson)> GenerateTutorialContentAsync(string transcript, string language)
|
|
{
|
|
var chatService = _kernel.GetRequiredService<IChatCompletionService>();
|
|
var prompt = $@"
|
|
Converta a transcrição abaixo em um Tutorial Passo a Passo em {language}.
|
|
REGRAS:
|
|
1. Divida em passos lógicos.
|
|
2. Identifique onde um print da tela é necessário e insira [SCREENSHOT: HH:MM:SS].
|
|
3. Retorne APENAS JSON.
|
|
JSON:
|
|
{{
|
|
""sections"": [
|
|
{{ ""title"": ""Passo 1"", ""content"": ""Texto... [SCREENSHOT: 00:01:20]"" }}
|
|
]
|
|
}}
|
|
Transcrição: {transcript[..Math.Min(transcript.Length, 15000)]}";
|
|
|
|
var result = await chatService.GetChatMessageContentAsync(prompt);
|
|
var json = result.Content?.Trim() ?? "{}";
|
|
if (json.StartsWith("```")) {
|
|
var idx = json.IndexOf('\n');
|
|
if (idx > 0) json = json[(idx+1)..];
|
|
if (json.EndsWith("```")) json = json[..^3];
|
|
}
|
|
|
|
var sections = new List<TutorialSection>();
|
|
try {
|
|
using var doc = JsonDocument.Parse(json);
|
|
foreach (var el in doc.RootElement.GetProperty("sections").EnumerateArray()) {
|
|
var content = el.GetProperty("content").GetString() ?? "";
|
|
var ts = ExtractTimestamp(content);
|
|
sections.Add(new TutorialSection {
|
|
Title = el.GetProperty("title").GetString() ?? "",
|
|
Content = content.Replace($"[SCREENSHOT: {ts}]", "").Trim(),
|
|
ImageTimestamp = ts
|
|
});
|
|
}
|
|
} catch { }
|
|
return (sections, json);
|
|
}
|
|
|
|
private string? ExtractTimestamp(string text)
|
|
{
|
|
var match = Regex.Match(text, @"\[SCREENSHOT:\s*(\d{2}:\d{2}:\d{2})\]");
|
|
return match.Success ? match.Groups[1].Value : null;
|
|
}
|
|
|
|
private async Task<string?> GetRawVideoStreamUrl(string videoUrl)
|
|
{
|
|
var ytDlpPath = GetYtDlpPath();
|
|
var startInfo = new ProcessStartInfo
|
|
{
|
|
FileName = ytDlpPath,
|
|
Arguments = $"-g -f b \"{videoUrl}\"",
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
|
|
using var proc = Process.Start(startInfo);
|
|
if (proc == null) return null;
|
|
var url = await proc.StandardOutput.ReadLineAsync();
|
|
await proc.WaitForExitAsync();
|
|
return url?.Trim();
|
|
}
|
|
|
|
private async Task CaptureScreenshotsWithPuppeteerAsync(string videoUrl, List<TutorialSection> sections, string outputDir)
|
|
{
|
|
var sectionsWithImages = sections.Where(s => !string.IsNullOrEmpty(s.ImageTimestamp)).ToList();
|
|
if (!sectionsWithImages.Any()) return;
|
|
|
|
AddLog("🔍 Obtendo link direto do vídeo (Bypass YouTube Player)...");
|
|
var rawVideoUrl = await GetRawVideoStreamUrl(videoUrl);
|
|
if (string.IsNullOrEmpty(rawVideoUrl)) { AddLog("❌ Falha ao obter link direto."); return; }
|
|
|
|
try
|
|
{
|
|
using var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true, Args = new[] { "--no-sandbox", "--window-size=1280,720" } });
|
|
using var page = await browser.NewPageAsync();
|
|
await page.SetViewportAsync(new ViewPortOptions { Width = 1280, Height = 720 });
|
|
|
|
var html = $@"<html><body style='margin:0;background:black;overflow:hidden;'><video id='v' width='1280' height='720' muted><source src='{rawVideoUrl}' type='video/mp4'></video></body></html>";
|
|
await page.SetContentAsync(html);
|
|
await page.WaitForSelectorAsync("video");
|
|
|
|
foreach (var section in sectionsWithImages)
|
|
{
|
|
if (TimeSpan.TryParse(section.ImageTimestamp, out var ts))
|
|
{
|
|
var sec = (int)ts.TotalSeconds;
|
|
AddLog($"🌐 Renderizando frame: {section.ImageTimestamp}...");
|
|
|
|
await page.EvaluateFunctionAsync(@"(s) => {
|
|
return new Promise(r => {
|
|
const v = document.getElementById('v');
|
|
v.currentTime = s;
|
|
v.addEventListener('seeked', r, {once:true});
|
|
});
|
|
}", sec);
|
|
|
|
await Task.Delay(500);
|
|
var path = Path.Combine(outputDir, $"snap_{sec}.jpg");
|
|
await page.ScreenshotAsync(path, new ScreenshotOptions { Type = ScreenshotType.Jpeg, Quality = 90 });
|
|
if (File.Exists(path)) section.ImageData = await File.ReadAllBytesAsync(path);
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex) { AddLog($"❌ Erro Puppeteer: {ex.Message}"); }
|
|
}
|
|
|
|
private byte[] GeneratePdf(string videoUrl, List<TutorialSection> sections)
|
|
{
|
|
var document = Document.Create(container =>
|
|
{
|
|
container.Page(page =>
|
|
{
|
|
page.Margin(2, Unit.Centimetre);
|
|
page.DefaultTextStyle(x => x.FontSize(11).FontFamily("Arial"));
|
|
page.Header().Text("Video Tutorial").SemiBold().FontSize(24).FontColor(Colors.Blue.Medium);
|
|
page.Content().PaddingVertical(1, Unit.Centimetre).Column(column =>
|
|
{
|
|
column.Item().Text($"Fonte: {videoUrl}").Italic().FontSize(10).FontColor(Colors.Grey.Medium);
|
|
column.Item().PaddingBottom(20);
|
|
foreach (var section in sections)
|
|
{
|
|
column.Item().Text(section.Title).Bold().FontSize(16);
|
|
column.Item().Text(text => { text.Span(section.Content); });
|
|
if (section.ImageData != null)
|
|
column.Item().PaddingVertical(15).Image(section.ImageData).FitWidth();
|
|
column.Item().PaddingBottom(20);
|
|
}
|
|
});
|
|
page.Footer().AlignCenter().Text(x => { x.Span("Gerado por VideoStudy.app - "); x.CurrentPageNumber(); });
|
|
});
|
|
});
|
|
return document.GeneratePdf();
|
|
}
|
|
}
|