diff --git a/Areas/AudioTools/Views/TextToSpeech/Index.cshtml b/Areas/AudioTools/Views/TextToSpeech/Index.cshtml index c5fc946..3c3322f 100644 --- a/Areas/AudioTools/Views/TextToSpeech/Index.cshtml +++ b/Areas/AudioTools/Views/TextToSpeech/Index.cshtml @@ -5,7 +5,7 @@

@ViewData["Title"]

-

Converta qualquer texto em fala usando vozes neurais de alta qualidade.

+

Converta qualquer texto em áudio para compartilhar no WhatsApp ou baixar.

@@ -13,47 +13,71 @@
- + +
0/5000 caracteres
-
- - +
+ +
-
- - +
+ +
-
- - +
+ +
-
- - -
-
@@ -61,159 +85,34 @@ @section Scripts { } diff --git a/Controllers/TtsApiController.cs b/Controllers/TtsApiController.cs index c906503..a3cc141 100644 --- a/Controllers/TtsApiController.cs +++ b/Controllers/TtsApiController.cs @@ -32,17 +32,24 @@ namespace Convert_It_Online.Controllers try { - _logger.LogInformation("Gerando áudio TTS: {Length} caracteres, idioma: {Lang}", - request.Text.Length, request.Language); + _logger.LogInformation("Gerando áudio TTS: {Length} caracteres, idioma: {Lang}, gênero: {Gender}", + request.Text.Length, request.Language, request.Gender); var audioBytes = await _ttsService.GenerateAudioAsync( request.Text, - request.Language ?? "pt-BR", + request.Language ?? "pt-br", request.Rate ?? 1.0f, - request.Pitch ?? 1.0f + request.Gender ?? "f" ); - return File(audioBytes, "audio/ogg", "audio.ogg"); + // Retornar como JSON com base64 + var base64Audio = Convert.ToBase64String(audioBytes); + return Ok(new TtsResponse + { + Audio = base64Audio, + Format = "ogg", + Size = audioBytes.Length + }); } catch (InvalidOperationException ex) { @@ -62,6 +69,13 @@ namespace Convert_It_Online.Controllers public string Text { get; set; } = string.Empty; public string? Language { get; set; } public float? Rate { get; set; } - public float? Pitch { get; set; } + public string? Gender { get; set; } + } + + public class TtsResponse + { + public string Audio { get; set; } = string.Empty; + public string Format { get; set; } = "ogg"; + public int Size { get; set; } } } diff --git a/Services/ITextToSpeechService.cs b/Services/ITextToSpeechService.cs index e46ae1e..131204a 100644 --- a/Services/ITextToSpeechService.cs +++ b/Services/ITextToSpeechService.cs @@ -4,6 +4,6 @@ namespace Convert_It_Online.Services { public interface ITextToSpeechService { - Task GenerateAudioAsync(string text, string language = "pt-BR", float rate = 1.0f, float pitch = 1.0f); + Task GenerateAudioAsync(string text, string language = "pt-br", float rate = 1.0f, string gender = "f"); } } diff --git a/Services/TextToSpeechService.cs b/Services/TextToSpeechService.cs index a0c6dab..9262523 100644 --- a/Services/TextToSpeechService.cs +++ b/Services/TextToSpeechService.cs @@ -83,7 +83,7 @@ namespace Convert_It_Online.Services return null; } - public async Task GenerateAudioAsync(string text, string language = "pt-BR", float rate = 1.0f, float pitch = 1.0f) + public async Task GenerateAudioAsync(string text, string language = "pt-br", float rate = 1.0f, string gender = "f") { if (_espeakPath == null) throw new InvalidOperationException("espeak-ng não está instalado no servidor."); @@ -96,21 +96,20 @@ namespace Convert_It_Online.Services try { - // Mapear código de idioma para espeak - var espeakLang = MapLanguageCode(language); + // Mapear código de idioma para espeak (com gênero) + var espeakVoice = MapVoice(language, gender); // Calcular velocidade (espeak usa words per minute, padrão ~175) - var wordsPerMinute = (int)(175 * rate); - if (wordsPerMinute < 80) wordsPerMinute = 80; - if (wordsPerMinute > 450) wordsPerMinute = 450; + // Usar uma faixa mais conservadora para melhor inteligibilidade + var wordsPerMinute = (int)(150 * rate); + if (wordsPerMinute < 100) wordsPerMinute = 100; + if (wordsPerMinute > 200) wordsPerMinute = 200; - // Calcular pitch (espeak usa 0-99, padrão 50) - var espeakPitch = (int)(50 * pitch); - if (espeakPitch < 0) espeakPitch = 0; - if (espeakPitch > 99) espeakPitch = 99; + // Pitch fixo para melhor qualidade + var espeakPitch = 50; // Gerar WAV com espeak-ng - var espeakArgs = $"-v {espeakLang} -s {wordsPerMinute} -p {espeakPitch} -w \"{tempWavPath}\" \"{EscapeText(text)}\""; + var espeakArgs = $"-v {espeakVoice} -s {wordsPerMinute} -p {espeakPitch} -w \"{tempWavPath}\" \"{EscapeText(text)}\""; _logger.LogInformation("Executando espeak-ng: {Args}", espeakArgs); @@ -181,19 +180,24 @@ namespace Convert_It_Online.Services } } - private string MapLanguageCode(string language) + private string MapVoice(string language, string gender) { + // espeak-ng usa sufixo +f1, +f2, etc para vozes femininas + // e +m1, +m2, etc para vozes masculinas + var genderSuffix = gender?.ToLower() == "m" ? "+m3" : "+f2"; + // Mapear códigos de idioma para vozes espeak-ng - return language.ToLower() switch + var baseLang = language.ToLower() switch { "pt-br" => "pt-br", - "pt" => "pt", - "pt-pt" => "pt-pt", + "pt" or "pt-pt" => "pt-pt", "es" or "es-mx" or "es-cl" or "es-py" or "es-es" => "es", "en" or "en-us" => "en-us", "en-gb" => "en-gb", - _ => "pt-br" // Padrão + _ => "pt-br" }; + + return baseLang + genderSuffix; } private string EscapeText(string text)