Convert-it/Services/TextToSpeechService.cs
Ricardo Carneiro f8f052428f
All checks were successful
Deploy ASP.NET MVC to OCI / build-and-deploy (push) Successful in 22m1s
fix: features de audio
2026-01-26 09:37:47 -03:00

210 lines
7.5 KiB
C#

using System;
using System.Diagnostics;
using System.IO;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace Convert_It_Online.Services
{
public class TextToSpeechService : ITextToSpeechService
{
private readonly ILogger<TextToSpeechService> _logger;
private readonly string? _espeakPath;
private readonly string? _ffmpegPath;
public TextToSpeechService(ILogger<TextToSpeechService> logger)
{
_logger = logger;
_espeakPath = FindExecutable("espeak-ng");
_ffmpegPath = FindExecutable("ffmpeg");
if (_espeakPath != null)
_logger.LogInformation("espeak-ng encontrado em: {Path}", _espeakPath);
else
_logger.LogWarning("espeak-ng não encontrado. TTS pode não funcionar.");
if (_ffmpegPath != null)
_logger.LogInformation("FFmpeg encontrado em: {Path}", _ffmpegPath);
else
_logger.LogWarning("FFmpeg não encontrado. Conversão para OGG pode não funcionar.");
}
private string? FindExecutable(string name)
{
var isWindows = RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
var executable = isWindows ? $"{name}.exe" : name;
// Caminhos comuns
string[] searchPaths;
if (isWindows)
{
searchPaths = new[]
{
Path.Combine(AppContext.BaseDirectory, name),
@"C:\Program Files\eSpeak NG",
@"C:\Program Files (x86)\eSpeak NG",
@"C:\Apps\espeak-ng",
@"C:\Apps\ffmpeg\bin",
@"C:\ffmpeg\bin",
@"C:\Program Files\ffmpeg\bin"
};
}
else
{
searchPaths = new[]
{
"/usr/bin",
"/usr/local/bin",
"/opt/homebrew/bin"
};
}
foreach (var path in searchPaths)
{
var fullPath = Path.Combine(path, executable);
if (File.Exists(fullPath))
return fullPath;
}
// Tentar encontrar via PATH
var pathEnv = Environment.GetEnvironmentVariable("PATH");
if (!string.IsNullOrEmpty(pathEnv))
{
var separator = isWindows ? ';' : ':';
foreach (var path in pathEnv.Split(separator))
{
var fullPath = Path.Combine(path.Trim(), executable);
if (File.Exists(fullPath))
return fullPath;
}
}
return null;
}
public async Task<byte[]> GenerateAudioAsync(string text, string language = "pt-BR", float rate = 1.0f, float pitch = 1.0f)
{
if (_espeakPath == null)
throw new InvalidOperationException("espeak-ng não está instalado no servidor.");
if (_ffmpegPath == null)
throw new InvalidOperationException("FFmpeg não está instalado no servidor.");
var tempWavPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.wav");
var tempOggPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.ogg");
try
{
// Mapear código de idioma para espeak
var espeakLang = MapLanguageCode(language);
// Calcular velocidade (espeak usa words per minute, padrão ~175)
var wordsPerMinute = (int)(175 * rate);
if (wordsPerMinute < 80) wordsPerMinute = 80;
if (wordsPerMinute > 450) wordsPerMinute = 450;
// Calcular pitch (espeak usa 0-99, padrão 50)
var espeakPitch = (int)(50 * pitch);
if (espeakPitch < 0) espeakPitch = 0;
if (espeakPitch > 99) espeakPitch = 99;
// Gerar WAV com espeak-ng
var espeakArgs = $"-v {espeakLang} -s {wordsPerMinute} -p {espeakPitch} -w \"{tempWavPath}\" \"{EscapeText(text)}\"";
_logger.LogInformation("Executando espeak-ng: {Args}", espeakArgs);
var espeakProcess = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = _espeakPath,
Arguments = espeakArgs,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
}
};
espeakProcess.Start();
await espeakProcess.WaitForExitAsync();
if (espeakProcess.ExitCode != 0)
{
var error = await espeakProcess.StandardError.ReadToEndAsync();
throw new Exception($"espeak-ng falhou: {error}");
}
if (!File.Exists(tempWavPath))
throw new Exception("espeak-ng não gerou o arquivo WAV.");
// Converter WAV para OGG com FFmpeg
var ffmpegArgs = $"-i \"{tempWavPath}\" -c:a libvorbis -q:a 4 -y \"{tempOggPath}\"";
_logger.LogInformation("Executando FFmpeg: {Args}", ffmpegArgs);
var ffmpegProcess = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = _ffmpegPath,
Arguments = ffmpegArgs,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
}
};
ffmpegProcess.Start();
await ffmpegProcess.WaitForExitAsync();
if (ffmpegProcess.ExitCode != 0)
{
var error = await ffmpegProcess.StandardError.ReadToEndAsync();
throw new Exception($"FFmpeg falhou: {error}");
}
if (!File.Exists(tempOggPath))
throw new Exception("FFmpeg não gerou o arquivo OGG.");
return await File.ReadAllBytesAsync(tempOggPath);
}
finally
{
// Limpar arquivos temporários
if (File.Exists(tempWavPath))
File.Delete(tempWavPath);
if (File.Exists(tempOggPath))
File.Delete(tempOggPath);
}
}
private string MapLanguageCode(string language)
{
// Mapear códigos de idioma para vozes espeak-ng
return language.ToLower() switch
{
"pt-br" => "pt-br",
"pt" => "pt",
"pt-pt" => "pt-pt",
"es" or "es-mx" or "es-cl" or "es-py" or "es-es" => "es",
"en" or "en-us" => "en-us",
"en-gb" => "en-gb",
_ => "pt-br" // Padrão
};
}
private string EscapeText(string text)
{
// Escapar aspas e caracteres especiais para linha de comando
return text
.Replace("\\", "\\\\")
.Replace("\"", "\\\"")
.Replace("\n", " ")
.Replace("\r", "");
}
}
}