From f8f052428f305e7b538759cf87e8a28466c250cc Mon Sep 17 00:00:00 2001 From: Ricardo Carneiro Date: Mon, 26 Jan 2026 09:37:47 -0300 Subject: [PATCH] fix: features de audio --- .claude/settings.local.json | 4 +- .../Views/SpeechToText/Index.cshtml | 1 + .../Views/TextToSpeech/Index.cshtml | 230 +++++++++++++++--- Controllers/TtsApiController.cs | 67 +++++ Dockerfile | 3 +- Program.cs | 1 + Services/AudioTranscriptionService.cs | 122 +++++++++- Services/ITextToSpeechService.cs | 9 + Services/TextToSpeechService.cs | 209 ++++++++++++++++ 9 files changed, 615 insertions(+), 31 deletions(-) create mode 100644 Controllers/TtsApiController.cs create mode 100644 Services/ITextToSpeechService.cs create mode 100644 Services/TextToSpeechService.cs diff --git a/.claude/settings.local.json b/.claude/settings.local.json index f45e8ff..7bf0e88 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -20,7 +20,9 @@ "Bash(dotnet build:*)", "Bash(rm:*)", "Bash(dotnet add package:*)", - "Bash(dir:*)" + "Bash(dir:*)", + "Bash(where:*)", + "Bash(winget install:*)" ], "deny": [], "ask": [] diff --git a/Areas/AudioTools/Views/SpeechToText/Index.cshtml b/Areas/AudioTools/Views/SpeechToText/Index.cshtml index 91a90cb..8e8c2f9 100644 --- a/Areas/AudioTools/Views/SpeechToText/Index.cshtml +++ b/Areas/AudioTools/Views/SpeechToText/Index.cshtml @@ -1,6 +1,7 @@ @{ ViewData["Title"] = "Áudio para Texto (Transcrição)"; var culture = ViewContext.RouteData.Values["culture"] as string ?? "pt-BR"; + Layout = "_Layout"; }
diff --git a/Areas/AudioTools/Views/TextToSpeech/Index.cshtml b/Areas/AudioTools/Views/TextToSpeech/Index.cshtml index 5b3513a..c5fc946 100644 --- a/Areas/AudioTools/Views/TextToSpeech/Index.cshtml +++ b/Areas/AudioTools/Views/TextToSpeech/Index.cshtml @@ -1,5 +1,6 @@ @{ ViewData["Title"] = "Texto para Áudio (Voz)"; + Layout = "_Layout"; }
@@ -31,12 +32,22 @@
- - + +
+ +
@@ -49,15 +60,15 @@ @section Scripts { } diff --git a/Controllers/TtsApiController.cs b/Controllers/TtsApiController.cs new file mode 100644 index 0000000..c906503 --- /dev/null +++ b/Controllers/TtsApiController.cs @@ -0,0 +1,67 @@ +using Microsoft.AspNetCore.Mvc; +using Convert_It_Online.Services; + +namespace Convert_It_Online.Controllers +{ + [ApiController] + [Route("api/tts")] + public class TtsApiController : ControllerBase + { + private readonly ITextToSpeechService _ttsService; + private readonly ILogger _logger; + + public TtsApiController(ITextToSpeechService ttsService, ILogger logger) + { + _ttsService = ttsService; + _logger = logger; + } + + [HttpPost("generate")] + public async Task Generate([FromBody] TtsRequest request) + { + if (string.IsNullOrWhiteSpace(request.Text)) + { + return BadRequest("Texto é obrigatório."); + } + + // Limitar tamanho do texto para evitar abusos + if (request.Text.Length > 5000) + { + return BadRequest("Texto muito longo. Máximo de 5000 caracteres."); + } + + try + { + _logger.LogInformation("Gerando áudio TTS: {Length} caracteres, idioma: {Lang}", + request.Text.Length, request.Language); + + var audioBytes = await _ttsService.GenerateAudioAsync( + request.Text, + request.Language ?? "pt-BR", + request.Rate ?? 1.0f, + request.Pitch ?? 1.0f + ); + + return File(audioBytes, "audio/ogg", "audio.ogg"); + } + catch (InvalidOperationException ex) + { + _logger.LogWarning(ex, "TTS não disponível"); + return StatusCode(503, "Serviço de síntese de voz não disponível no momento."); + } + catch (Exception ex) + { + _logger.LogError(ex, "Erro ao gerar áudio TTS"); + return StatusCode(500, "Erro ao gerar áudio."); + } + } + } + + public class TtsRequest + { + public string Text { get; set; } = string.Empty; + public string? Language { get; set; } + public float? Rate { get; set; } + public float? Pitch { get; set; } + } +} diff --git a/Dockerfile b/Dockerfile index b252db6..a01205a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,10 +44,11 @@ FROM base AS final WORKDIR /app COPY --from=publish /app/publish . -# Instalar ffmpeg e bibliotecas nativas (rodar como root) +# Instalar ffmpeg, espeak-ng e bibliotecas nativas (rodar como root) USER root RUN apt-get update && apt-get install -y \ ffmpeg \ + espeak-ng \ libc6-dev \ && rm -rf /var/lib/apt/lists/* USER app diff --git a/Program.cs b/Program.cs index 6fd6f22..de316bc 100644 --- a/Program.cs +++ b/Program.cs @@ -164,6 +164,7 @@ builder.Services.AddLocalization(); builder.Services.AddSingleton(); builder.Services.AddSingleton(); +builder.Services.AddSingleton(); var supportedCultures = new[] { "pt-BR", "es-MX", "es-CL", "es-PY" }; builder.Services.Configure(options => diff --git a/Services/AudioTranscriptionService.cs b/Services/AudioTranscriptionService.cs index 39d987d..0c2d565 100644 --- a/Services/AudioTranscriptionService.cs +++ b/Services/AudioTranscriptionService.cs @@ -1,6 +1,8 @@ using System; +using System.Diagnostics; using System.IO; using System.Net.Http; +using System.Runtime.InteropServices; using System.Threading.Tasks; using Whisper.net; using Whisper.net.Ggml; @@ -14,19 +16,137 @@ namespace Convert_It_Online.Services private readonly string _modelPath; private readonly ILogger _logger; private readonly HttpClient _httpClient; + private static bool _ffmpegConfigured = false; public AudioTranscriptionService(ILogger logger) { _logger = logger; _httpClient = new HttpClient(); _modelPath = Path.Combine(AppContext.BaseDirectory, "Models", "ggml-base.bin"); - + // Garantir que a pasta Models existe var modelsDir = Path.GetDirectoryName(_modelPath); if (!Directory.Exists(modelsDir)) { Directory.CreateDirectory(modelsDir!); } + + // Configurar FFmpeg uma única vez + ConfigureFFmpeg(); + } + + private void ConfigureFFmpeg() + { + if (_ffmpegConfigured) return; + + try + { + string? ffmpegPath = null; + + // 1. Primeiro, verificar variável de ambiente (maior prioridade) + var envPath = Environment.GetEnvironmentVariable("FFMPEG_PATH"); + if (!string.IsNullOrEmpty(envPath) && Directory.Exists(envPath)) + { + ffmpegPath = envPath; + _logger.LogInformation("FFmpeg configurado via FFMPEG_PATH: {Path}", ffmpegPath); + } + + // 2. Se não encontrou, procurar em locais comuns + if (ffmpegPath == null) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + // Windows: procurar em locais comuns + var possiblePaths = new[] + { + Path.Combine(AppContext.BaseDirectory, "ffmpeg"), + @"C:\Apps\ffmpeg\bin", + @"C:\Apps\ffmpeg", + Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ProgramFiles), "ffmpeg", "bin"), + @"C:\ffmpeg\bin", + @"C:\ffmpeg", + @"C:\Program Files\ffmpeg\bin", + @"C:\tools\ffmpeg\bin", + @"C:\tools\ffmpeg" + }; + + foreach (var path in possiblePaths) + { + var ffmpegExe = Path.Combine(path, "ffmpeg.exe"); + if (File.Exists(ffmpegExe)) + { + ffmpegPath = path; + _logger.LogInformation("FFmpeg encontrado em: {Path}", ffmpegPath); + break; + } + } + + // Tentar encontrar via PATH + if (ffmpegPath == null) + { + ffmpegPath = FindFFmpegInPath("ffmpeg.exe"); + } + } + else + { + // Linux: verificar locais padrão + var possiblePaths = new[] + { + "/usr/bin", + "/usr/local/bin", + "/opt/ffmpeg/bin" + }; + + foreach (var path in possiblePaths) + { + var ffmpegExe = Path.Combine(path, "ffmpeg"); + if (File.Exists(ffmpegExe)) + { + ffmpegPath = path; + _logger.LogInformation("FFmpeg encontrado em: {Path}", ffmpegPath); + break; + } + } + } + } + + if (!string.IsNullOrEmpty(ffmpegPath)) + { + FFmpeg.SetExecutablesPath(ffmpegPath); + _logger.LogInformation("FFmpeg.SetExecutablesPath configurado: {Path}", ffmpegPath); + } + else + { + _logger.LogWarning("FFmpeg não encontrado em caminhos conhecidos. A transcrição de áudio pode falhar."); + } + + _ffmpegConfigured = true; + } + catch (Exception ex) + { + _logger.LogError(ex, "Erro ao configurar FFmpeg"); + } + } + + private string? FindFFmpegInPath(string executable) + { + var pathEnv = Environment.GetEnvironmentVariable("PATH"); + if (string.IsNullOrEmpty(pathEnv)) return null; + + var separator = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? ';' : ':'; + var paths = pathEnv.Split(separator); + + foreach (var path in paths) + { + var fullPath = Path.Combine(path.Trim(), executable); + if (File.Exists(fullPath)) + { + _logger.LogInformation("FFmpeg encontrado no PATH: {Path}", path); + return path; + } + } + + return null; } private async Task EnsureModelExistsAsync() diff --git a/Services/ITextToSpeechService.cs b/Services/ITextToSpeechService.cs new file mode 100644 index 0000000..e46ae1e --- /dev/null +++ b/Services/ITextToSpeechService.cs @@ -0,0 +1,9 @@ +using System.Threading.Tasks; + +namespace Convert_It_Online.Services +{ + public interface ITextToSpeechService + { + Task GenerateAudioAsync(string text, string language = "pt-BR", float rate = 1.0f, float pitch = 1.0f); + } +} diff --git a/Services/TextToSpeechService.cs b/Services/TextToSpeechService.cs new file mode 100644 index 0000000..a0c6dab --- /dev/null +++ b/Services/TextToSpeechService.cs @@ -0,0 +1,209 @@ +using System; +using System.Diagnostics; +using System.IO; +using System.Runtime.InteropServices; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; + +namespace Convert_It_Online.Services +{ + public class TextToSpeechService : ITextToSpeechService + { + private readonly ILogger _logger; + private readonly string? _espeakPath; + private readonly string? _ffmpegPath; + + public TextToSpeechService(ILogger logger) + { + _logger = logger; + _espeakPath = FindExecutable("espeak-ng"); + _ffmpegPath = FindExecutable("ffmpeg"); + + if (_espeakPath != null) + _logger.LogInformation("espeak-ng encontrado em: {Path}", _espeakPath); + else + _logger.LogWarning("espeak-ng não encontrado. TTS pode não funcionar."); + + if (_ffmpegPath != null) + _logger.LogInformation("FFmpeg encontrado em: {Path}", _ffmpegPath); + else + _logger.LogWarning("FFmpeg não encontrado. Conversão para OGG pode não funcionar."); + } + + private string? FindExecutable(string name) + { + var isWindows = RuntimeInformation.IsOSPlatform(OSPlatform.Windows); + var executable = isWindows ? $"{name}.exe" : name; + + // Caminhos comuns + string[] searchPaths; + if (isWindows) + { + searchPaths = new[] + { + Path.Combine(AppContext.BaseDirectory, name), + @"C:\Program Files\eSpeak NG", + @"C:\Program Files (x86)\eSpeak NG", + @"C:\Apps\espeak-ng", + @"C:\Apps\ffmpeg\bin", + @"C:\ffmpeg\bin", + @"C:\Program Files\ffmpeg\bin" + }; + } + else + { + searchPaths = new[] + { + "/usr/bin", + "/usr/local/bin", + "/opt/homebrew/bin" + }; + } + + foreach (var path in searchPaths) + { + var fullPath = Path.Combine(path, executable); + if (File.Exists(fullPath)) + return fullPath; + } + + // Tentar encontrar via PATH + var pathEnv = Environment.GetEnvironmentVariable("PATH"); + if (!string.IsNullOrEmpty(pathEnv)) + { + var separator = isWindows ? ';' : ':'; + foreach (var path in pathEnv.Split(separator)) + { + var fullPath = Path.Combine(path.Trim(), executable); + if (File.Exists(fullPath)) + return fullPath; + } + } + + return null; + } + + public async Task GenerateAudioAsync(string text, string language = "pt-BR", float rate = 1.0f, float pitch = 1.0f) + { + if (_espeakPath == null) + throw new InvalidOperationException("espeak-ng não está instalado no servidor."); + + if (_ffmpegPath == null) + throw new InvalidOperationException("FFmpeg não está instalado no servidor."); + + var tempWavPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.wav"); + var tempOggPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.ogg"); + + try + { + // Mapear código de idioma para espeak + var espeakLang = MapLanguageCode(language); + + // Calcular velocidade (espeak usa words per minute, padrão ~175) + var wordsPerMinute = (int)(175 * rate); + if (wordsPerMinute < 80) wordsPerMinute = 80; + if (wordsPerMinute > 450) wordsPerMinute = 450; + + // Calcular pitch (espeak usa 0-99, padrão 50) + var espeakPitch = (int)(50 * pitch); + if (espeakPitch < 0) espeakPitch = 0; + if (espeakPitch > 99) espeakPitch = 99; + + // Gerar WAV com espeak-ng + var espeakArgs = $"-v {espeakLang} -s {wordsPerMinute} -p {espeakPitch} -w \"{tempWavPath}\" \"{EscapeText(text)}\""; + + _logger.LogInformation("Executando espeak-ng: {Args}", espeakArgs); + + var espeakProcess = new Process + { + StartInfo = new ProcessStartInfo + { + FileName = _espeakPath, + Arguments = espeakArgs, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true + } + }; + + espeakProcess.Start(); + await espeakProcess.WaitForExitAsync(); + + if (espeakProcess.ExitCode != 0) + { + var error = await espeakProcess.StandardError.ReadToEndAsync(); + throw new Exception($"espeak-ng falhou: {error}"); + } + + if (!File.Exists(tempWavPath)) + throw new Exception("espeak-ng não gerou o arquivo WAV."); + + // Converter WAV para OGG com FFmpeg + var ffmpegArgs = $"-i \"{tempWavPath}\" -c:a libvorbis -q:a 4 -y \"{tempOggPath}\""; + + _logger.LogInformation("Executando FFmpeg: {Args}", ffmpegArgs); + + var ffmpegProcess = new Process + { + StartInfo = new ProcessStartInfo + { + FileName = _ffmpegPath, + Arguments = ffmpegArgs, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true + } + }; + + ffmpegProcess.Start(); + await ffmpegProcess.WaitForExitAsync(); + + if (ffmpegProcess.ExitCode != 0) + { + var error = await ffmpegProcess.StandardError.ReadToEndAsync(); + throw new Exception($"FFmpeg falhou: {error}"); + } + + if (!File.Exists(tempOggPath)) + throw new Exception("FFmpeg não gerou o arquivo OGG."); + + return await File.ReadAllBytesAsync(tempOggPath); + } + finally + { + // Limpar arquivos temporários + if (File.Exists(tempWavPath)) + File.Delete(tempWavPath); + if (File.Exists(tempOggPath)) + File.Delete(tempOggPath); + } + } + + private string MapLanguageCode(string language) + { + // Mapear códigos de idioma para vozes espeak-ng + return language.ToLower() switch + { + "pt-br" => "pt-br", + "pt" => "pt", + "pt-pt" => "pt-pt", + "es" or "es-mx" or "es-cl" or "es-py" or "es-es" => "es", + "en" or "en-us" => "en-us", + "en-gb" => "en-gb", + _ => "pt-br" // Padrão + }; + } + + private string EscapeText(string text) + { + // Escapar aspas e caracteres especiais para linha de comando + return text + .Replace("\\", "\\\\") + .Replace("\"", "\\\"") + .Replace("\n", " ") + .Replace("\r", ""); + } + } +}