From 9e85f28bf627c5a7f58dd8736f306c8c4cd6f1db Mon Sep 17 00:00:00 2001 From: Ricardo Carneiro <71648276+ricarneiro@users.noreply.github.com> Date: Mon, 5 May 2025 19:50:24 -0300 Subject: [PATCH] =?UTF-8?q?feat:=20Ultima=20vers=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- YTExtractor/Program.cs | 23 +- YTExtractor/Services/ChainYoutubeService.cs | 41 +++ .../Services/Handlers/YouExposeHandler.cs | 83 ++++++ .../Handlers/YoutubeExplodeHandler.cs | 83 ++++++ .../Handlers/YoutubeServiceHandler.cs | 25 ++ YTExtractor/Services/Handlers/YtDlpHandler.cs | 119 ++++++++ YTExtractor/Services/IYoutubeService.cs | 29 ++ YTExtractor/Services/VttFixerService.cs | 272 ++++++++++++++++++ YTExtractor/Services/YouExpose/Client.cs | 230 +++++++++++++++ YTExtractor/Services/YouExposeService.cs | 72 +++++ .../YoutubeExplode/YoutubeExplodeClient.cs | 130 +++++++++ YTExtractor/YTExtractor.csproj | 5 + YTExtractor/YoutubeService.cs | 9 +- YTExtractor/appsettings.json | 9 +- 14 files changed, 1119 insertions(+), 11 deletions(-) create mode 100644 YTExtractor/Services/ChainYoutubeService.cs create mode 100644 YTExtractor/Services/Handlers/YouExposeHandler.cs create mode 100644 YTExtractor/Services/Handlers/YoutubeExplodeHandler.cs create mode 100644 YTExtractor/Services/Handlers/YoutubeServiceHandler.cs create mode 100644 YTExtractor/Services/Handlers/YtDlpHandler.cs create mode 100644 YTExtractor/Services/IYoutubeService.cs create mode 100644 YTExtractor/Services/VttFixerService.cs create mode 100644 YTExtractor/Services/YouExpose/Client.cs create mode 100644 YTExtractor/Services/YouExposeService.cs create mode 100644 YTExtractor/Services/YoutubeExplode/YoutubeExplodeClient.cs diff --git a/YTExtractor/Program.cs b/YTExtractor/Program.cs index ae93840..efbd289 100644 --- a/YTExtractor/Program.cs +++ b/YTExtractor/Program.cs @@ -3,6 +3,7 @@ using Serilog; using YTExtractor.Data; using YTExtractor.Logging.Configuration; using YTExtractor.Services; +using Microsoft.Extensions.DependencyInjection; // App configuration and endpoints var builder = WebApplication.CreateBuilder(args); @@ -18,16 +19,30 @@ builder.Services.AddEndpointsApiExplorer(); builder.Services.AddSwaggerGen(); builder.Services.AddSingleton(); +// Register VTT processing service +builder.Services.AddSingleton(); + +// Register YoutubeExplode client +builder.Services.AddSingleton(); + +// Register YouTube service handlers and service +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); + +// Register Chain of Responsibility implementation +builder.Services.AddSingleton(); + var app = builder.Build(); app.UseSwagger(); app.UseSwaggerUI(); -app.MapPost("/api/video-info", async (VideoRequest request, MongoDBConnector mongo) => +app.MapPost("/api/video-info", async (VideoRequest request, MongoDBConnector mongo, IYoutubeService youtubeService) => { try { - if (!YoutubeService.IsValidYouTubeUrl(request.Url)) + if (!youtubeService.IsValidYouTubeUrl(request.Url)) return Results.BadRequest("Invalid YouTube URL"); var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); @@ -48,8 +63,8 @@ app.MapPost("/api/video-info", async (VideoRequest request, MongoDBConnector mon )); } - var info = await YoutubeService.GetVideoInfo(request.Url, tempDir); - var subtitles = service.ExtractPlainText(await YoutubeService.GetSubtitles(request.Url, request.Language, tempDir)); + var info = await youtubeService.GetVideoInfo(request.Url, tempDir); + var subtitles = service.ExtractPlainText(await youtubeService.GetSubtitles(request.Url, request.Language, tempDir)); await mongo.InsertVideo(new VideoData { diff --git a/YTExtractor/Services/ChainYoutubeService.cs b/YTExtractor/Services/ChainYoutubeService.cs new file mode 100644 index 0000000..7aaedd1 --- /dev/null +++ b/YTExtractor/Services/ChainYoutubeService.cs @@ -0,0 +1,41 @@ +using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; +using YTExtractor.Services.Handlers; + +namespace YTExtractor.Services +{ + public class ChainYoutubeService : IYoutubeService + { + private readonly ILogger _logger; + private readonly YoutubeServiceHandler _firstHandler; + + public ChainYoutubeService( + ILogger logger, + YoutubeExplodeHandler youtubeExplodeHandler, + YtDlpHandler ytDlpHandler) + { + _logger = logger; + + // Set up the chain of responsibility + _firstHandler = youtubeExplodeHandler; + youtubeExplodeHandler.SetNext(ytDlpHandler); + } + + public bool IsValidYouTubeUrl(string url) + { + return Regex.IsMatch(url, @"^(https?\:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$"); + } + + public async Task GetVideoInfo(string url, string workingDir) + { + _logger.LogInformation("Starting chain of responsibility for video info: {Url}", url); + return await _firstHandler.HandleVideoInfo(url, workingDir); + } + + public async Task GetSubtitles(string url, string language, string workingDir) + { + _logger.LogInformation("Starting chain of responsibility for subtitles: {Url}, language: {Language}", url, language); + return await _firstHandler.HandleSubtitles(url, language, workingDir); + } + } +} \ No newline at end of file diff --git a/YTExtractor/Services/Handlers/YouExposeHandler.cs b/YTExtractor/Services/Handlers/YouExposeHandler.cs new file mode 100644 index 0000000..c53f020 --- /dev/null +++ b/YTExtractor/Services/Handlers/YouExposeHandler.cs @@ -0,0 +1,83 @@ +using Microsoft.Extensions.Logging; +using YTExtractor.Services.YoutubeExplode; + +namespace YTExtractor.Services.Handlers +{ + public class YoutubeExplodeHandler : YoutubeServiceHandler + { + private readonly YoutubeExplodeClient _youtubeExplodeClient; + private readonly VttFixerService _vttFixerService; + + public YoutubeExplodeHandler( + ILogger logger, + YoutubeExplodeClient youtubeExplodeClient, + VttFixerService vttFixerService) : base(logger) + { + _youtubeExplodeClient = youtubeExplodeClient; + _vttFixerService = vttFixerService; + } + + public override async Task HandleVideoInfo(string url, string workingDir) + { + try + { + _logger.LogInformation("Getting video info using YoutubeExplode for {Url}", url); + + var videoInfo = await _youtubeExplodeClient.GetVideoInfoAsync(url); + + if (videoInfo != null && !string.IsNullOrEmpty(videoInfo.Title)) + { + _logger.LogInformation("Successfully retrieved video info using YoutubeExplode for {Url}", url); + return videoInfo; + } + + _logger.LogInformation("No video info found with YoutubeExplode, passing to next handler for {Url}", url); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Error getting video info with YoutubeExplode, passing to next handler for {Url}", url); + } + + // Pass to the next handler if YoutubeExplode fails or returns no data + if (_nextHandler != null) + { + return await _nextHandler.HandleVideoInfo(url, workingDir); + } + + throw new Exception("Failed to get video info. No more handlers available."); + } + + public override async Task HandleSubtitles(string url, string language, string workingDir) + { + try + { + _logger.LogInformation("Getting subtitles using YoutubeExplode for {Url} in language {Language}", url, language); + + var subtitles = await _youtubeExplodeClient.GetSubtitlesAsync(url, language); + + if (!string.IsNullOrEmpty(subtitles)) + { + _logger.LogInformation("Successfully retrieved subtitles using YoutubeExplode for {Url}", url); + + // Fix the subtitles with VttFixer + var fixedSubtitles = _vttFixerService.FixYoutubeVtt(subtitles); + return fixedSubtitles; + } + + _logger.LogInformation("No subtitles found with YoutubeExplode, passing to next handler for {Url}", url); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Error getting subtitles with YoutubeExplode, passing to next handler for {Url}", url); + } + + // Pass to the next handler if YoutubeExplode fails or returns no data + if (_nextHandler != null) + { + return await _nextHandler.HandleSubtitles(url, language, workingDir); + } + + throw new Exception("Failed to get subtitles. No more handlers available."); + } + } +} \ No newline at end of file diff --git a/YTExtractor/Services/Handlers/YoutubeExplodeHandler.cs b/YTExtractor/Services/Handlers/YoutubeExplodeHandler.cs new file mode 100644 index 0000000..c53f020 --- /dev/null +++ b/YTExtractor/Services/Handlers/YoutubeExplodeHandler.cs @@ -0,0 +1,83 @@ +using Microsoft.Extensions.Logging; +using YTExtractor.Services.YoutubeExplode; + +namespace YTExtractor.Services.Handlers +{ + public class YoutubeExplodeHandler : YoutubeServiceHandler + { + private readonly YoutubeExplodeClient _youtubeExplodeClient; + private readonly VttFixerService _vttFixerService; + + public YoutubeExplodeHandler( + ILogger logger, + YoutubeExplodeClient youtubeExplodeClient, + VttFixerService vttFixerService) : base(logger) + { + _youtubeExplodeClient = youtubeExplodeClient; + _vttFixerService = vttFixerService; + } + + public override async Task HandleVideoInfo(string url, string workingDir) + { + try + { + _logger.LogInformation("Getting video info using YoutubeExplode for {Url}", url); + + var videoInfo = await _youtubeExplodeClient.GetVideoInfoAsync(url); + + if (videoInfo != null && !string.IsNullOrEmpty(videoInfo.Title)) + { + _logger.LogInformation("Successfully retrieved video info using YoutubeExplode for {Url}", url); + return videoInfo; + } + + _logger.LogInformation("No video info found with YoutubeExplode, passing to next handler for {Url}", url); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Error getting video info with YoutubeExplode, passing to next handler for {Url}", url); + } + + // Pass to the next handler if YoutubeExplode fails or returns no data + if (_nextHandler != null) + { + return await _nextHandler.HandleVideoInfo(url, workingDir); + } + + throw new Exception("Failed to get video info. No more handlers available."); + } + + public override async Task HandleSubtitles(string url, string language, string workingDir) + { + try + { + _logger.LogInformation("Getting subtitles using YoutubeExplode for {Url} in language {Language}", url, language); + + var subtitles = await _youtubeExplodeClient.GetSubtitlesAsync(url, language); + + if (!string.IsNullOrEmpty(subtitles)) + { + _logger.LogInformation("Successfully retrieved subtitles using YoutubeExplode for {Url}", url); + + // Fix the subtitles with VttFixer + var fixedSubtitles = _vttFixerService.FixYoutubeVtt(subtitles); + return fixedSubtitles; + } + + _logger.LogInformation("No subtitles found with YoutubeExplode, passing to next handler for {Url}", url); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Error getting subtitles with YoutubeExplode, passing to next handler for {Url}", url); + } + + // Pass to the next handler if YoutubeExplode fails or returns no data + if (_nextHandler != null) + { + return await _nextHandler.HandleSubtitles(url, language, workingDir); + } + + throw new Exception("Failed to get subtitles. No more handlers available."); + } + } +} \ No newline at end of file diff --git a/YTExtractor/Services/Handlers/YoutubeServiceHandler.cs b/YTExtractor/Services/Handlers/YoutubeServiceHandler.cs new file mode 100644 index 0000000..2c67278 --- /dev/null +++ b/YTExtractor/Services/Handlers/YoutubeServiceHandler.cs @@ -0,0 +1,25 @@ +using Microsoft.Extensions.Logging; + +namespace YTExtractor.Services.Handlers +{ + public abstract class YoutubeServiceHandler + { + protected readonly ILogger _logger; + protected YoutubeServiceHandler? _nextHandler; + + protected YoutubeServiceHandler(ILogger logger) + { + _logger = logger; + } + + public YoutubeServiceHandler SetNext(YoutubeServiceHandler handler) + { + _nextHandler = handler; + return handler; + } + + public abstract Task HandleVideoInfo(string url, string workingDir); + + public abstract Task HandleSubtitles(string url, string language, string workingDir); + } +} \ No newline at end of file diff --git a/YTExtractor/Services/Handlers/YtDlpHandler.cs b/YTExtractor/Services/Handlers/YtDlpHandler.cs new file mode 100644 index 0000000..d53e866 --- /dev/null +++ b/YTExtractor/Services/Handlers/YtDlpHandler.cs @@ -0,0 +1,119 @@ +using Microsoft.Extensions.Logging; +using System.Diagnostics; +using System.Reflection; +using System.Runtime.InteropServices; +using System.Text.Json; + +namespace YTExtractor.Services.Handlers +{ + public class YtDlpHandler : YoutubeServiceHandler + { + private readonly VttFixerService _vttFixerService; + + public YtDlpHandler(ILogger logger, VttFixerService vttFixerService) : base(logger) + { + _vttFixerService = vttFixerService; + } + + public override async Task HandleVideoInfo(string url, string workingDir) + { + try + { + _logger.LogInformation("Getting video info using yt-dlp for {Url}", url); + + var startInfo = new ProcessStartInfo + { + FileName = "yt-dlp", + Arguments = $"--dump-json {url}", + RedirectStandardOutput = true, + UseShellExecute = false, + CreateNoWindow = true, + WorkingDirectory = workingDir + }; + + using var process = Process.Start(startInfo); + var output = await process.StandardOutput.ReadToEndAsync(); + await process.WaitForExitAsync(); + + if (process.ExitCode != 0) + throw new Exception("Failed to get video info using yt-dlp"); + + var jsonDoc = JsonDocument.Parse(output); + var root = jsonDoc.RootElement; + + return new YtDlpInfo( + root.GetProperty("title").GetString() ?? "", + root.GetProperty("thumbnail").GetString() ?? "" + ); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error getting video info with yt-dlp for {Url}", url); + + // Pass to the next handler if yt-dlp fails + if (_nextHandler != null) + { + return await _nextHandler.HandleVideoInfo(url, workingDir); + } + + throw new Exception("Failed to get video info. No more handlers available."); + } + } + + public override async Task HandleSubtitles(string url, string language, string workingDir) + { + try + { + _logger.LogInformation("Getting subtitles using yt-dlp for {Url} in language {Language}", url, language); + + var pathExe = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + var exePath = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) + ? Path.Combine(pathExe, "yt-dlp.exe") + : "yt-dlp"; + + var startInfo = new ProcessStartInfo + { + FileName = exePath, + Arguments = $"--write-sub --write-auto-sub --sub-lang {language} --skip-download {url}", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + WorkingDirectory = workingDir + }; + + using var process = Process.Start(startInfo); + var output = await process.StandardOutput.ReadToEndAsync(); + var error = await process.StandardError.ReadToEndAsync(); + await process.WaitForExitAsync(); + + var subtitleFile = Directory.GetFiles(workingDir, "*.vtt").FirstOrDefault(); + if (subtitleFile == null) + throw new Exception("No subtitles found using yt-dlp"); + + // Read the VTT file + var vttContent = await File.ReadAllTextAsync(subtitleFile); + + // Fix the VTT content + _logger.LogInformation("Fixing VTT subtitles for {Url}", url); + var serviceSrt = new ConvertTranscriptService(); + var srt = serviceSrt.ConvertToSrt(vttContent); + var fixedContent = _vttFixerService.FixYoutubeVtt(vttContent); + + return fixedContent; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error getting subtitles with yt-dlp for {Url}", url); + + // Pass to the next handler if yt-dlp fails + if (_nextHandler != null) + { + return await _nextHandler.HandleSubtitles(url, language, workingDir); + } + + throw new Exception("Failed to get subtitles. No more handlers available."); + } + } + } +} \ No newline at end of file diff --git a/YTExtractor/Services/IYoutubeService.cs b/YTExtractor/Services/IYoutubeService.cs new file mode 100644 index 0000000..504dca3 --- /dev/null +++ b/YTExtractor/Services/IYoutubeService.cs @@ -0,0 +1,29 @@ +namespace YTExtractor.Services +{ + public interface IYoutubeService + { + /// + /// Validates if the provided URL is a valid YouTube URL + /// + /// The URL to validate + /// True if the URL is a valid YouTube URL, otherwise false + bool IsValidYouTubeUrl(string url); + + /// + /// Gets video information from a YouTube URL + /// + /// The YouTube video URL + /// The working directory for temporary files + /// Basic information about the video + Task GetVideoInfo(string url, string workingDir); + + /// + /// Gets subtitles for a YouTube video + /// + /// The YouTube video URL + /// The language code for subtitles + /// The working directory for temporary files + /// The subtitles content as a string + Task GetSubtitles(string url, string language, string workingDir); + } +} \ No newline at end of file diff --git a/YTExtractor/Services/VttFixerService.cs b/YTExtractor/Services/VttFixerService.cs new file mode 100644 index 0000000..3976a71 --- /dev/null +++ b/YTExtractor/Services/VttFixerService.cs @@ -0,0 +1,272 @@ +using System.Text; +using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; + +namespace YTExtractor.Services +{ + public class VttFixerService + { + private readonly ILogger _logger; + + public VttFixerService(ILogger logger) + { + _logger = logger; + } + + /// + /// Fixes YouTube's autogenerated VTT subtitles with HTML tags and returns an SRT-formatted string + /// + /// The VTT content as a string + /// The fixed subtitles in SRT format + public string FixYoutubeVtt(string vttContent) + { + try + { + _logger.LogInformation("Starting to fix YouTube VTT content with HTML tags"); + + // Define regex patterns to extract timing and text + var timeRegex = new Regex(@"(\d{2}:\d{2}:\d{2}\.\d{3}) --> (\d{2}:\d{2}:\d{2}\.\d{3})"); + var htmlTagRegex = new Regex(@"<[^>]+>"); + + // Split the content by WEBVTT header and timestamps + var lines = vttContent.Split('\n'); + var resultBuilder = new StringBuilder(); + + List captions = new List(); + Caption currentCaption = null; + bool isHeader = true; + StringBuilder textBuilder = new StringBuilder(); + + // First pass: Parse the VTT file and extract captions + foreach (var line in lines) + { + // Skip header lines + if (isHeader) + { + if (line.Trim().StartsWith("00:")) + isHeader = false; + else + continue; + } + + // Check if this is a timing line + var timeMatch = timeRegex.Match(line); + if (timeMatch.Success) + { + // If we already have a caption in progress, save it + if (currentCaption != null && textBuilder.Length > 0) + { + currentCaption.Text = textBuilder.ToString().Trim(); + captions.Add(currentCaption); + textBuilder.Clear(); + } + + // Create a new caption + currentCaption = new Caption + { + Start = timeMatch.Groups[1].Value.Replace(".", ","), + End = timeMatch.Groups[2].Value.Replace(".", ",") + }; + } + // Otherwise, this is caption text + else if (currentCaption != null && !string.IsNullOrWhiteSpace(line)) + { + // Remove HTML tags and add the cleaned text + string cleanText = htmlTagRegex.Replace(line, ""); + if (!string.IsNullOrWhiteSpace(cleanText)) + { + textBuilder.AppendLine(cleanText); + } + } + } + + // Add the last caption if there is one + if (currentCaption != null && textBuilder.Length > 0) + { + currentCaption.Text = textBuilder.ToString().Trim(); + captions.Add(currentCaption); + } + + // Second pass: Process captions to remove duplicates + Caption previousCaption = null; + int counter = 1; + + foreach (var caption in captions) + { + // Clean the text + string cleanText = CleanText(caption.Text); + + if (previousCaption != null) + { + string prevCleanText = CleanText(previousCaption.Text); + + // Check if this caption is a duplicate of the previous one + if (prevCleanText.Equals(cleanText, StringComparison.OrdinalIgnoreCase)) + { + // Update the end time of the previous caption and skip this one + previousCaption.End = caption.End; + continue; + } + + // Check if the previous caption text appears at the start of this caption + if (cleanText.StartsWith(prevCleanText, StringComparison.OrdinalIgnoreCase)) + { + // Extract only the new part + string newText = cleanText.Substring(prevCleanText.Length).Trim(); + if (!string.IsNullOrWhiteSpace(newText)) + { + caption.Text = newText; + } + else + { + // If there's no new text, skip this caption + continue; + } + } + + // Write the previous caption + resultBuilder.AppendLine(counter.ToString()); + resultBuilder.AppendLine($"{previousCaption.Start} --> {previousCaption.End}"); + resultBuilder.AppendLine(prevCleanText); + resultBuilder.AppendLine(); + counter++; + } + + previousCaption = new Caption + { + Start = caption.Start, + End = caption.End, + Text = caption.Text + }; + } + + // Write the last caption + if (previousCaption != null) + { + string prevCleanText = CleanText(previousCaption.Text); + resultBuilder.AppendLine(counter.ToString()); + resultBuilder.AppendLine($"{previousCaption.Start} --> {previousCaption.End}"); + resultBuilder.AppendLine(prevCleanText); + resultBuilder.AppendLine(); + } + + _logger.LogInformation("Successfully fixed YouTube VTT content with HTML tags"); + return resultBuilder.ToString(); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error fixing YouTube VTT content with HTML tags"); + throw; + } + } + + /// + /// Cleans VTT text by removing HTML tags, position markers, and other formatting + /// + private string CleanText(string text) + { + if (string.IsNullOrEmpty(text)) + return string.Empty; + + // Remove HTML tags + text = Regex.Replace(text, @"<[^>]+>", ""); + + // Remove position markers like "align:start position:0%" + text = Regex.Replace(text, @"align:[a-z]+ position:\d+%", ""); + + // Remove timestamps in format <00:00:00.000> + text = Regex.Replace(text, @"<\d{2}:\d{2}:\d{2}\.\d{3}>", ""); + + // Clean up whitespace + text = Regex.Replace(text, @"\s+", " ").Trim(); + + return text; + } + + private string LimpaTexto(string texto) + { + var textoLimpo = Regex.Replace(texto, @"<\d{2}:\d{2}:\d{2}\.\d{3}>", ""); + textoLimpo = Regex.Replace(textoLimpo, @"<\/?c>", ""); + + return textoLimpo; + } + + /// + /// Fixes YouTube's autogenerated VTT subtitles file and returns an SRT-formatted string + /// + /// Path to the VTT file + /// The fixed subtitles in SRT format + public string FixYoutubeVttFile(string vttFilePath) + { + _logger.LogInformation("Reading VTT file: {VttFilePath}", vttFilePath); + var vttContent = File.ReadAllText(vttFilePath); + return FixYoutubeVtt(vttContent); + } + + private List ParseVtt(string vttContent) + { + var captions = new List(); + + // Skip the WEBVTT header + var lines = vttContent.Split('\n') + .Skip(1) // Skip WEBVTT line + .Select(l => l.Trim()) + .Where(l => !string.IsNullOrWhiteSpace(l)) + .ToList(); + + // Pattern to match timestamp lines like "00:00:00.000 --> 00:00:05.000" + var timestampPattern = new Regex(@"(\d{2}:\d{2}:\d{2}\.\d{3})\s-->\s(\d{2}:\d{2}:\d{2}\.\d{3})"); + + Caption? currentCaption = null; + StringBuilder textBuilder = new StringBuilder(); + + foreach (var line in lines) + { + var match = timestampPattern.Match(line); + + if (match.Success) + { + // If we were building a caption, add it to the list + if (currentCaption != null && textBuilder.Length > 0) + { + currentCaption.Text = textBuilder.ToString().Trim(); + captions.Add(currentCaption); + textBuilder.Clear(); + } + + // Start a new caption + currentCaption = new Caption + { + Start = match.Groups[1].Value, + End = match.Groups[2].Value + }; + } + else if (currentCaption != null && !line.Contains("-->") && !string.IsNullOrWhiteSpace(line)) + { + // Add text to the current caption + if (textBuilder.Length > 0) + { + textBuilder.AppendLine(); + } + textBuilder.Append(line); + } + } + + // Add the last caption if there is one + if (currentCaption != null && textBuilder.Length > 0) + { + currentCaption.Text = textBuilder.ToString().Trim(); + captions.Add(currentCaption); + } + + return captions; + } + + private class Caption + { + public string Start { get; set; } = string.Empty; + public string End { get; set; } = string.Empty; + public string Text { get; set; } = string.Empty; + } + } +} \ No newline at end of file diff --git a/YTExtractor/Services/YouExpose/Client.cs b/YTExtractor/Services/YouExpose/Client.cs new file mode 100644 index 0000000..e20ab5f --- /dev/null +++ b/YTExtractor/Services/YouExpose/Client.cs @@ -0,0 +1,230 @@ +using System.Net.Http.Json; +using System.Text.Json; +using Microsoft.Extensions.Logging; + +namespace YTExtractor.Services.YouExpose +{ + /// + /// Cliente para a API YouExpose - biblioteca para acessar dados do YouTube + /// + public class Client + { + private readonly ILogger _logger; + private readonly HttpClient _httpClient; + private readonly string _apiKey; + private readonly JsonSerializerOptions _jsonOptions; + + /// + /// Construtor do cliente YouExpose + /// + /// Logger para registro de eventos + /// Cliente HTTP opcional (útil para testes) + /// Chave de API do YouExpose (opcional) + public Client(ILogger logger, HttpClient? httpClient = null, string? apiKey = null) + { + _logger = logger; + _httpClient = httpClient ?? new HttpClient(); + _apiKey = apiKey ?? Environment.GetEnvironmentVariable("YOUEXPOSE_API_KEY") ?? ""; + + // Configurar base URL se necessário + if (!string.IsNullOrEmpty(Environment.GetEnvironmentVariable("YOUEXPOSE_API_URL"))) + { + _httpClient.BaseAddress = new Uri(Environment.GetEnvironmentVariable("YOUEXPOSE_API_URL")); + } + else + { + _httpClient.BaseAddress = new Uri("https://api.youexpose.com/"); + } + + _jsonOptions = new JsonSerializerOptions + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true + }; + } + + /// + /// Obtém informações de um vídeo do YouTube + /// + /// URL do vídeo do YouTube + /// Informações básicas do vídeo + public async Task GetVideoInfoAsync(string url) + { + try + { + _logger.LogInformation("Obtendo informações do vídeo via YouExpose: {Url}", url); + + // Extrair ID do vídeo da URL + var videoId = ExtractVideoId(url); + if (string.IsNullOrEmpty(videoId)) + { + _logger.LogWarning("ID do vídeo não pôde ser extraído da URL: {Url}", url); + return null; + } + + // Fazer requisição à API + var response = await _httpClient.GetAsync($"api/videos/{videoId}?key={_apiKey}"); + + if (!response.IsSuccessStatusCode) + { + _logger.LogWarning("Falha ao obter informações do vídeo: {StatusCode}", response.StatusCode); + return null; + } + + // Deserializar resposta + var result = await response.Content.ReadFromJsonAsync>(_jsonOptions); + return result?.Data; + } + catch (Exception ex) + { + _logger.LogError(ex, "Erro ao obter informações do vídeo: {Url}", url); + return null; + } + } + + /// + /// Obtém legendas de um vídeo do YouTube + /// + /// URL do vídeo do YouTube + /// Código do idioma (ex: "pt", "en") + /// Conteúdo das legendas + public async Task GetSubtitlesAsync(string url, string language) + { + try + { + _logger.LogInformation("Obtendo legendas via YouExpose: {Url}, Idioma: {Language}", url, language); + + // Extrair ID do vídeo da URL + var videoId = ExtractVideoId(url); + if (string.IsNullOrEmpty(videoId)) + { + _logger.LogWarning("ID do vídeo não pôde ser extraído da URL: {Url}", url); + return null; + } + + // Fazer requisição à API + var response = await _httpClient.GetAsync($"api/videos/{videoId}/subtitles?lang={language}&key={_apiKey}"); + + if (!response.IsSuccessStatusCode) + { + _logger.LogWarning("Falha ao obter legendas: {StatusCode}", response.StatusCode); + return null; + } + + // Obter conteúdo das legendas + var result = await response.Content.ReadFromJsonAsync>(_jsonOptions); + return result?.Data?.Content; + } + catch (Exception ex) + { + _logger.LogError(ex, "Erro ao obter legendas: {Url}, Idioma: {Language}", url, language); + return null; + } + } + + /// + /// Extrai o ID do vídeo de uma URL do YouTube + /// + /// URL do YouTube + /// ID do vídeo ou null se não encontrado + private string? ExtractVideoId(string url) + { + try + { + // Verificar se é uma URL válida + if (!Uri.TryCreate(url, UriKind.Absolute, out var uri)) + { + return null; + } + + // Para URLs como: https://www.youtube.com/watch?v=VIDEO_ID + if (uri.Host.Contains("youtube.com") && uri.AbsolutePath.Contains("watch")) + { + var query = System.Web.HttpUtility.ParseQueryString(uri.Query); + return query["v"]; + } + + // Para URLs como: https://youtu.be/VIDEO_ID + if (uri.Host.Contains("youtu.be")) + { + return uri.AbsolutePath.TrimStart('/'); + } + + return null; + } + catch + { + return null; + } + } + } + + /// + /// Informações básicas de um vídeo + /// + public class VideoInfo + { + /// + /// Título do vídeo + /// + public string Title { get; set; } = string.Empty; + + /// + /// URL da miniatura do vídeo + /// + public string ThumbnailUrl { get; set; } = string.Empty; + + /// + /// Duração do vídeo em segundos + /// + public int Duration { get; set; } + + /// + /// Nome do canal + /// + public string ChannelName { get; set; } = string.Empty; + } + + /// + /// Resposta de legendas + /// + public class SubtitleResponse + { + /// + /// Idioma das legendas + /// + public string Language { get; set; } = string.Empty; + + /// + /// Conteúdo das legendas + /// + public string Content { get; set; } = string.Empty; + + /// + /// Indica se são legendas geradas automaticamente + /// + public bool IsAutoGenerated { get; set; } + } + + /// + /// Estrutura genérica para resposta da API + /// + /// Tipo de dados na resposta + public class ApiResponse + { + /// + /// Status da requisição + /// + public bool Success { get; set; } + + /// + /// Mensagem (em caso de erro) + /// + public string Message { get; set; } = string.Empty; + + /// + /// Dados retornados + /// + public T? Data { get; set; } + } +} \ No newline at end of file diff --git a/YTExtractor/Services/YouExposeService.cs b/YTExtractor/Services/YouExposeService.cs new file mode 100644 index 0000000..a0f38a8 --- /dev/null +++ b/YTExtractor/Services/YouExposeService.cs @@ -0,0 +1,72 @@ +using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; +using YTExtractor.Services; + +namespace YTExtractor.Services +{ + public class YouExposeService : IYoutubeService + { + private readonly ILogger _logger; + private readonly YoutubeService _fallbackService; + + public YouExposeService(ILogger logger, YoutubeService fallbackService) + { + _logger = logger; + _fallbackService = fallbackService; + } + + public bool IsValidYouTubeUrl(string url) + { + return Regex.IsMatch(url, @"^(https?\:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$"); + } + + public async Task GetVideoInfo(string url, string workingDir) + { + try + { + _logger.LogInformation("Attempting to get video info using YouExpose for {Url}", url); + + // TODO: Implement YouExpose video info retrieval + // This is where you would use the YouExpose library to get video information + // Example code (to be replaced with actual implementation): + // var youExposeClient = new YouExposeClient(); + // var videoInfo = await youExposeClient.GetVideoInfoAsync(url); + // return new YtDlpInfo(videoInfo.Title, videoInfo.ThumbnailUrl); + + // For now, fall back to yt-dlp + _logger.LogInformation("YouExpose implementation not available, falling back to yt-dlp for {Url}", url); + return await _fallbackService.GetVideoInfo(url, workingDir); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Error getting video info with YouExpose, falling back to yt-dlp for {Url}", url); + return await _fallbackService.GetVideoInfo(url, workingDir); + } + } + + public async Task GetSubtitles(string url, string language, string workingDir) + { + try + { + _logger.LogInformation("Attempting to get subtitles using YouExpose for {Url} in language {Language}", url, language); + + // TODO: Implement YouExpose subtitles retrieval + // This is where you would use the YouExpose library to get subtitles + // Example code (to be replaced with actual implementation): + // var youExposeClient = new YouExposeClient(); + // var subtitles = await youExposeClient.GetSubtitlesAsync(url, language); + // if (!string.IsNullOrEmpty(subtitles)) + // return subtitles; + + // For now, fall back to yt-dlp + _logger.LogInformation("YouExpose implementation not available, falling back to yt-dlp for {Url}", url); + return await _fallbackService.GetSubtitles(url, language, workingDir); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Error getting subtitles with YouExpose, falling back to yt-dlp for {Url}", url); + return await _fallbackService.GetSubtitles(url, language, workingDir); + } + } + } +} \ No newline at end of file diff --git a/YTExtractor/Services/YoutubeExplode/YoutubeExplodeClient.cs b/YTExtractor/Services/YoutubeExplode/YoutubeExplodeClient.cs new file mode 100644 index 0000000..6d24f66 --- /dev/null +++ b/YTExtractor/Services/YoutubeExplode/YoutubeExplodeClient.cs @@ -0,0 +1,130 @@ +using Microsoft.Extensions.Logging; +using System.Text; +using YoutubeExplode; +using YoutubeExplode.Videos; +using YoutubeExplode.Videos.ClosedCaptions; + +namespace YTExtractor.Services.YoutubeExplode +{ + /// + /// Client using the YoutubeExplode library to extract data from YouTube + /// + public class YoutubeExplodeClient + { + private readonly ILogger _logger; + private readonly YoutubeClient _youtube; + + public YoutubeExplodeClient(ILogger logger) + { + _logger = logger; + _youtube = new YoutubeClient(); + } + + /// + /// Gets video information from a YouTube URL + /// + /// The YouTube video URL + /// Video information or null if it fails + public async Task GetVideoInfoAsync(string url) + { + try + { + _logger.LogInformation("Getting video info using YoutubeExplode for {Url}", url); + + // Get the video ID from the URL + var videoId = VideoId.TryParse(url); + if (videoId == null) + { + _logger.LogWarning("Invalid YouTube URL: {Url}", url); + return null; + } + + // Get the video metadata + var video = await _youtube.Videos.GetAsync(videoId.Value); + + return new YtDlpInfo( + video.Title, + video.Thumbnails.LastOrDefault()?.Url ?? "" + ); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error getting video info with YoutubeExplode for {Url}", url); + return null; + } + } + + /// + /// Gets subtitles for a YouTube video + /// + /// The YouTube video URL + /// The language code (e.g., "en", "pt") + /// The subtitle content or null if it fails + public async Task GetSubtitlesAsync(string url, string language) + { + try + { + _logger.LogInformation("Getting subtitles using YoutubeExplode for {Url} in language {Language}", url, language); + + // Get the video ID from the URL + var videoId = VideoId.TryParse(url); + if (videoId == null) + { + _logger.LogWarning("Invalid YouTube URL: {Url}", url); + return null; + } + + // Get the closed caption tracks available for the video + var trackManifest = await _youtube.Videos.ClosedCaptions.GetManifestAsync(videoId.Value); + + // Find the track for the requested language + var trackInfo = trackManifest.GetByLanguage(language); + if (trackInfo == null) + { + // If not found, try to find an auto-generated track for the language + trackInfo = trackManifest.Tracks + .FirstOrDefault(t => t.Language.Code == language && t.IsAutoGenerated); + + if (trackInfo == null) + { + _logger.LogWarning("No subtitles found for language {Language} for video {Url}", language, url); + return null; + } + } + + // Get the actual captions + var track = await _youtube.Videos.ClosedCaptions.GetAsync(trackInfo); + + // Convert the captions to WebVTT format + var vttBuilder = new StringBuilder(); + vttBuilder.AppendLine("WEBVTT"); + vttBuilder.AppendLine(); + + foreach (var caption in track.Captions) + { + var startTime = FormatTime(caption.Offset); + var endTime = FormatTime(caption.Offset + caption.Duration); + + vttBuilder.AppendLine($"{startTime} --> {endTime}"); + vttBuilder.AppendLine(caption.Text); + vttBuilder.AppendLine(); + } + + return vttBuilder.ToString(); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error getting subtitles with YoutubeExplode for {Url} in language {Language}", url, language); + return null; + } + } + + /// + /// Format TimeSpan as WebVTT time format (HH:MM:SS.mmm) + /// + private string FormatTime(TimeSpan time) + { + return $"{time.Hours:D2}:{time.Minutes:D2}:{time.Seconds:D2}.{time.Milliseconds:D3}"; + } + } +} \ No newline at end of file diff --git a/YTExtractor/YTExtractor.csproj b/YTExtractor/YTExtractor.csproj index 465fb33..81e1ae6 100644 --- a/YTExtractor/YTExtractor.csproj +++ b/YTExtractor/YTExtractor.csproj @@ -6,6 +6,10 @@ enable + + + + @@ -20,6 +24,7 @@ + diff --git a/YTExtractor/YoutubeService.cs b/YTExtractor/YoutubeService.cs index e099e45..5a9806d 100644 --- a/YTExtractor/YoutubeService.cs +++ b/YTExtractor/YoutubeService.cs @@ -4,17 +4,18 @@ using System.Reflection; using System.Runtime.InteropServices; using System.Text.Json; using System.Text.RegularExpressions; +using YTExtractor.Services; namespace YTExtractor { - public class YoutubeService + public class YoutubeService : IYoutubeService { - public static bool IsValidYouTubeUrl(string urlx) + public bool IsValidYouTubeUrl(string urlx) { return Regex.IsMatch(urlx, @"^(https?\:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$"); } - public static async Task GetVideoInfo(string url, string workingDir) + public async Task GetVideoInfo(string url, string workingDir) { var startInfo = new ProcessStartInfo { @@ -42,7 +43,7 @@ namespace YTExtractor ); } - public static async Task GetSubtitles(string url, string language, string workingDir) + public async Task GetSubtitles(string url, string language, string workingDir) { var pathExe = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); var exePath = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) diff --git a/YTExtractor/appsettings.json b/YTExtractor/appsettings.json index f61bb6a..3c1378d 100644 --- a/YTExtractor/appsettings.json +++ b/YTExtractor/appsettings.json @@ -19,6 +19,9 @@ } }, "AllowedHosts": "*", - //"MongoDbConnection": "mongodb://192.168.0.99:27017" - "MongoDbConnection": "mongodb://admin:c4rn31r0@192.168.0.82:27017,192.168.0.81:27017/?replicaSet=rs0" -} + "MongoDbConnection": "mongodb://admin:c4rn31r0@192.168.0.82:27017,192.168.0.81:27017/?replicaSet=rs0", + "YouExpose": { + "ApiKey": "sua-chave-api-aqui", + "ApiUrl": "https://api.youexpose.com/" + } +} \ No newline at end of file