feat: conversor de audio

2026-01-25 14:47:46 -03:00 · 2026-01-25 14:47:46 -03:00 · 724e03176e
commit 724e03176e
parent 3b0c93a35e
15 changed files with 503 additions and 1 deletions
--- a/Areas/AudioTools/Controllers/SpeechToTextController.cs
+++ b/Areas/AudioTools/Controllers/SpeechToTextController.cs
@ -0,0 +1,69 @@
 using Microsoft.AspNetCore.Mvc;
 using Convert_It_Online.Services;
 using Microsoft.AspNetCore.Localization;
 namespace Convert_It_Online.Areas.AudioTools.Controllers
 {
    [Area("AudioTools")]
    [Route("{culture}/[area]/[controller]")]
    [Route("[area]/[controller]")] // Adicionado para Share Target sem cultura fixa
    public class SpeechToTextController : Controller
    {
        private readonly IAudioTranscriptionService _transcriptionService;
        private readonly ILogger<SpeechToTextController> _logger;
        public SpeechToTextController(IAudioTranscriptionService transcriptionService, ILogger<SpeechToTextController> logger)
        {
            _transcriptionService = transcriptionService;
            _logger = logger;
        }
        [HttpGet]
        public IActionResult Index()
        {
            return View();
        }
        [HttpPost]
        public async Task<IActionResult> Transcribe(IFormFile audioFile)
        {
            if (audioFile == null || audioFile.Length == 0)
            {
                ViewBag.Error = "Por favor, selecione um arquivo de áudio.";
                return View("Index");
            }
            var culture = HttpContext.Features.Get<IRequestCultureFeature>()?.RequestCulture.UICulture.Name ?? "pt-BR";
            var tempPath = Path.GetTempFileName();
            try
            {
                using (var stream = new FileStream(tempPath, FileMode.Create))
                {
                    await audioFile.CopyToAsync(stream);
                }
                var transcription = await _transcriptionService.TranscribeAsync(tempPath, culture);
                ViewBag.Result = transcription;
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Erro no controller ao transcrever.");
                ViewBag.Error = "Erro ao processar o áudio. Verifique se o formato é suportado.";
            }
            finally
            {
                if (System.IO.File.Exists(tempPath)) System.IO.File.Delete(tempPath);
            }
            return View("Index");
        }
        [HttpPost("HandleShare")]
        public async Task<IActionResult> HandleShare(IFormFile audio)
        {
            // O Android via Share Target costuma enviar como 'audio' ou 'file'
            return await Transcribe(audio);
        }
    }
 }
--- a/Areas/AudioTools/Controllers/TextToSpeechController.cs
+++ b/Areas/AudioTools/Controllers/TextToSpeechController.cs
@ -0,0 +1,15 @@
 using Microsoft.AspNetCore.Mvc;
 namespace Convert_It_Online.Areas.AudioTools.Controllers
 {
    [Area("AudioTools")]
    [Route("{culture}/[area]/[controller]")]
    public class TextToSpeechController : Controller
    {
        [HttpGet]
        public IActionResult Index()
        {
            return View();
        }
    }
 }
--- a/Areas/AudioTools/Views/SpeechToText/Index.cshtml
+++ b/Areas/AudioTools/Views/SpeechToText/Index.cshtml
@ -0,0 +1,69 @@
@{
    ViewData["Title"] = "Áudio para Texto (Transcrição)";
    var culture = ViewContext.RouteData.Values["culture"] as string ?? "pt-BR";
 }
 <div class="text-center mb-5">
    <h1 class="display-4">@ViewData["Title"]</h1>
    <p class="lead">Converta áudios do WhatsApp, reuniões ou gravações em texto automaticamente usando IA.</p>
 </div>
 <div class="row justify-content-center">
    <div class="col-md-8">
        <div class="card shadow-custom p-4">
            <form asp-action="Transcribe" method="post" enctype="multipart/form-data">
                <div class="mb-4">
                    <label for="audioFile" class="form-label h5">Selecione o arquivo de áudio</label>
                    <input type="file" class="form-control form-control-lg" id="audioFile" name="audioFile" accept="audio/*" required>
                    <div class="form-text mt-2">Formatos suportados: MP3, WAV, OGG, OPUS, M4A, etc.</div>
                </div>
                <div class="d-grid gap-2">
                    <button type="submit" class="btn btn-primary btn-lg">
                        <i class="bi bi-mic-fill me-2"></i>Transcrever Áudio
                    </button>
                </div>
            </form>
            @if (ViewBag.Error != null)
            {
                <div class="alert alert-danger mt-4" role="alert">
                    <i class="bi bi-exclamation-triangle-fill me-2"></i>@ViewBag.Error
                </div>
            }
            @if (ViewBag.Result != null)
            {
                <div class="mt-5">
                    <h4 class="mb-3">Transcrição:</h4>
                    <div class="p-3 bg-light border rounded" style="min-height: 150px; white-space: pre-wrap;">@ViewBag.Result</div>
                    <div class="mt-3 d-flex gap-2">
                        <button class="btn btn-outline-secondary btn-sm" onclick="copyTranscription()">
                            <i class="bi bi-clipboard me-1"></i>Copiar Texto
                        </button>
                    </div>
                </div>
            }
        </div>
        <div class="mt-5">
            <h3 class="h5 mb-3"><i class="bi bi-shield-check me-2"></i>Privacidade e Tecnologia</h3>
            <p class="text-muted small">
                Seu áudio é processado usando a tecnologia <strong>OpenAI Whisper</strong> rodando diretamente em nosso servidor. 
                Não enviamos seus dados para APIs externas e os arquivos temporários são deletados imediatamente após a conversão.
            </p>
        </div>
    </div>
 </div>
@section Scripts {
    <script>
        function copyTranscription() {
            const text = document.querySelector('.bg-light.border.rounded').innerText;
            navigator.clipboard.writeText(text).then(() => {
                alert('Transcrição copiada!');
            });
        }
    </script>
 }
--- a/Areas/AudioTools/Views/TextToSpeech/Index.cshtml
+++ b/Areas/AudioTools/Views/TextToSpeech/Index.cshtml
@ -0,0 +1,122 @@
@{
    ViewData["Title"] = "Texto para Áudio (Voz)";
 }
 <div class="text-center mb-5">
    <h1 class="display-4">@ViewData["Title"]</h1>
    <p class="lead">Converta qualquer texto em fala usando vozes neurais de alta qualidade.</p>
 </div>
 <div class="row justify-content-center">
    <div class="col-md-8">
        <div class="card shadow-custom p-4">
            <div class="mb-4">
                <label for="textInput" class="form-label h5">Digite ou cole seu texto</label>
                <textarea class="form-control" id="textInput" rows="6" placeholder="Escreva aqui o que você deseja que seja lido..."></textarea>
            </div>
            <div class="row mb-4">
                <div class="col-md-6">
                    <label for="voiceSelect" class="form-label">Escolher Voz</label>
                    <select id="voiceSelect" class="form-select"></select>
                </div>
                <div class="col-md-3">
                    <label for="rate" class="form-label">Velocidade</label>
                    <input type="range" class="form-range" min="0.5" max="2" step="0.1" id="rate" value="1">
                </div>
                <div class="col-md-3">
                    <label for="pitch" class="form-label">Tom</label>
                    <input type="range" class="form-range" min="0" max="2" step="0.1" id="pitch" value="1">
                </div>
            </div>
            <div class="d-grid gap-2 d-md-flex justify-content-md-center">
                <button type="button" class="btn btn-primary btn-lg px-5" onclick="speak()">
                    <i class="bi bi-play-fill me-2"></i>Ouvir
                </button>
                <button type="button" class="btn btn-outline-danger btn-lg" onclick="stop()">
                    <i class="bi bi-stop-fill me-2"></i>Parar
                </button>
            </div>
        </div>
        <div class="mt-4 alert alert-info">
            <i class="bi bi-info-circle me-2"></i> 
            Esta ferramenta usa as vozes instaladas no seu dispositivo. No Android e Windows, você encontrará opções de vozes neurais muito naturais.
        </div>
    </div>
 </div>
@section Scripts {
    <script>
        const synth = window.speechSynthesis;
        const voiceSelect = document.querySelector('#voiceSelect');
        const textInput = document.querySelector('#textInput');
        const rate = document.querySelector('#rate');
        const pitch = document.querySelector('#pitch');
        let voices = [];
        function populateVoiceList() {
            voices = synth.getVoices().sort(function (a, b) {
                const aname = a.name.toUpperCase();
                const bname = b.name.toUpperCase();
                if (aname < bname) return -1;
                else if (aname > bname) return 1;
                else return 0;
            });
            const selectedIndex = voiceSelect.selectedIndex < 0 ? 0 : voiceSelect.selectedIndex;
            voiceSelect.innerHTML = '';
            for (let i = 0; i < voices.length; i++) {
                const option = document.createElement('option');
                option.textContent = voices[i].name + ' (' + voices[i].lang + ')';
                if (voices[i].default) {
                    option.textContent += ' -- PADRÃO';
                }
                option.setAttribute('data-lang', voices[i].lang);
                option.setAttribute('data-name', voices[i].name);
                voiceSelect.appendChild(option);
            }
            voiceSelect.selectedIndex = selectedIndex;
        }
        populateVoiceList();
        if (speechSynthesis.onvoiceschanged !== undefined) {
            speechSynthesis.onvoiceschanged = populateVoiceList;
        }
        function speak() {
            if (synth.speaking) {
                console.error('speechSynthesis.speaking');
                return;
            }
            if (textInput.value !== '') {
                const utterThis = new SpeechSynthesisUtterance(textInput.value);
                utterThis.onend = function (event) {
                    console.log('SpeechSynthesisUtterance.onend');
                }
                utterThis.onerror = function (event) {
                    console.error('SpeechSynthesisUtterance.onerror');
                }
                const selectedOption = voiceSelect.selectedOptions[0].getAttribute('data-name');
                for (let i = 0; i < voices.length; i++) {
                    if (voices[i].name === selectedOption) {
                        utterThis.voice = voices[i];
                        break;
                    }
                }
                utterThis.pitch = pitch.value;
                utterThis.rate = rate.value;
                synth.speak(utterThis);
            }
        }
        function stop() {
            synth.cancel();
        }
    </script>
 }
--- a/Areas/AudioTools/Views/_ViewImports.cshtml
+++ b/Areas/AudioTools/Views/_ViewImports.cshtml
@ -0,0 +1,5 @@
@using Convert_It_Online
@using Microsoft.AspNetCore.Mvc.Localization
@addTagHelper *, Microsoft.AspNetCore.Mvc.TagHelpers
@inject IViewLocalizer Localizer
--- a/Areas/AudioTools/Views/_ViewStart.cshtml
+++ b/Areas/AudioTools/Views/_ViewStart.cshtml
@ -0,0 +1,3 @@
@{
    Layout = "_Layout";
 }
--- a/Convert-It.csproj
+++ b/Convert-It.csproj
@ -23,6 +23,9 @@
    <PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
    <PackageReference Include="Serilog.Enrichers.Process" Version="3.0.0" />
    <PackageReference Include="Serilog.Enrichers.Thread" Version="4.0.0" />
    <PackageReference Include="Whisper.net" Version="1.9.0" />
    <PackageReference Include="Whisper.net.Runtime" Version="1.9.0" />
    <PackageReference Include="Xabe.FFmpeg" Version="6.0.2" />
  </ItemGroup>
  </Project>
--- a/8
+++ b/8
@ -44,6 +44,14 @@ FROM base AS final
 WORKDIR /app
 COPY --from=publish /app/publish .
 # Instalar ffmpeg e bibliotecas nativas (rodar como root)
 USER root
 RUN apt-get update && apt-get install -y \
    ffmpeg \
    libc6-dev \
    && rm -rf /var/lib/apt/lists/*
 USER app
 # Variáveis de ambiente otimizadas para produção
 ENV DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=false
 ENV DOTNET_USE_POLLING_FILE_WATCHER=true
--- a/Program.cs
+++ b/Program.cs
@ -163,6 +163,7 @@ builder.Host.UseSerilog();
 builder.Services.AddLocalization();
 builder.Services.AddSingleton<IUrlTranslationService, UrlTranslationService>();
 builder.Services.AddSingleton<IAudioTranscriptionService, AudioTranscriptionService>();
 var supportedCultures = new[] { "pt-BR", "es-MX", "es-CL", "es-PY" };
 builder.Services.Configure<RequestLocalizationOptions>(options =>
--- a/Readme.md
+++ b/Readme.md
@ -1 +1,47 @@
-
+# Convert-It Online
 Ferramenta multiuso de conversão de arquivos (Imagens, Documentos, Texto e Áudio) desenvolvida em ASP.NET Core 8 MVC.
 ## 🛠️ Funcionalidades
 - **Imagens:** HEIC para JPG, JPG para WebP.
 - **Documentos:** PDF para Texto, Extração de Linha Digitável de Boletos (Barcode).
 - **Texto:** Conversor de Case (Maiúsculo/Minúsculo).
 - **Áudio:** Transcrição de Áudio para Texto (Whisper AI) e Texto para Voz (Web Speech API).
 - **PWA:** Suporte a instalação e integração com menu de compartilhamento do Android (Share Target).
 ## 🚀 Dependências Externas (Obrigatório)
 Para as funcionalidades de áudio (transcrição), o projeto depende do **FFmpeg**.
 ### 🐧 Linux (Ubuntu/Debian)
 ```bash
 sudo apt update
 sudo apt install ffmpeg
 ```
 ### 🪟 Windows
 1. Baixe os binários em [ffmpeg.org](https://ffmpeg.org/download.html).
 2. Extraia para uma pasta (ex: `C:\ffmpeg`).
 3. Adicione a pasta `bin` (ex: `C:\ffmpeg\bin`) às **Variáveis de Ambiente do Sistema (PATH)**.
 4. Reinicie o terminal ou o Visual Studio.
 ### 🐳 Docker
 A imagem Docker já está configurada para instalar o `ffmpeg` automaticamente durante o build.
 ## 💻 Desenvolvimento Local
 1. Certifique-se de ter o .NET 8 SDK instalado.
 2. Clone o repositório.
 3. Configure o FFmpeg conforme instruções acima.
 4. Execute o comando:
   ```bash
   dotnet run
   ```
 ## 📱 PWA & Android Share Target
 O projeto está configurado como um Progressive Web App. Ao "Instalar" o site no Android:
 1. Ele aparecerá como um aplicativo nativo.
 2. Você poderá compartilhar arquivos de áudio diretamente do WhatsApp para o Convert-It para transcrição automática.
 ---
 Desenvolvido por Ricardo.
--- a/Services/AudioTranscriptionService.cs
+++ b/Services/AudioTranscriptionService.cs
@ -0,0 +1,97 @@
 using System;
 using System.IO;
 using System.Net.Http;
 using System.Threading.Tasks;
 using Whisper.net;
 using Whisper.net.Ggml;
 using Xabe.FFmpeg;
 using Microsoft.Extensions.Logging;
 namespace Convert_It_Online.Services
 {
    public class AudioTranscriptionService : IAudioTranscriptionService
    {
        private readonly string _modelPath;
        private readonly ILogger<AudioTranscriptionService> _logger;
        private readonly HttpClient _httpClient;
        public AudioTranscriptionService(ILogger<AudioTranscriptionService> logger)
        {
            _logger = logger;
            _httpClient = new HttpClient();
            _modelPath = Path.Combine(AppContext.BaseDirectory, "Models", "ggml-base.bin");
            // Garantir que a pasta Models existe
            var modelsDir = Path.GetDirectoryName(_modelPath);
            if (!Directory.Exists(modelsDir))
            {
                Directory.CreateDirectory(modelsDir!);
            }
        }
        private async Task EnsureModelExistsAsync()
        {
            if (!System.IO.File.Exists(_modelPath))
            {
                _logger.LogInformation("Baixando modelo Whisper Base...");
                var downloader = new WhisperGgmlDownloader(_httpClient);
                using var modelStream = await downloader.GetGgmlModelAsync(GgmlType.Base);
                using var fileStream = System.IO.File.Create(_modelPath);
                await modelStream.CopyToAsync(fileStream);
                _logger.LogInformation("Modelo Whisper baixado com sucesso.");
            }
        }
        public async Task<string> TranscribeAsync(string inputPath, string culture = "pt-BR")
        {
            await EnsureModelExistsAsync();
            string tempWavPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.wav");
            try
            {
                _logger.LogInformation("Convertendo áudio para WAV 16kHz Mono...");
                // Configurar FFmpeg (assume que está no PATH em Linux)
                // Se estiver no Windows, pode precisar de FFmpeg.SetExecutablesPath
                var conversion = await FFmpeg.Conversions.New()
                    .AddParameter($"-i \"{inputPath}\"")
                    .AddParameter("-ar 16000")
                    .AddParameter("-ac 1")
                    .AddParameter("-c:a pcm_s16le")
                    .SetOutput(tempWavPath)
                    .Start();
                _logger.LogInformation("Iniciando transcrição com Whisper...");
                using var factory = WhisperFactory.FromPath(_modelPath);
                using var processor = factory.CreateBuilder()
                    .WithLanguage(culture.Split('-')[0]) // Usa "pt", "es", etc
                    .Build();
                using var wavStream = System.IO.File.OpenRead(tempWavPath);
                var result = "";
                await foreach (var segment in processor.ProcessAsync(wavStream))
                {
                    result += segment.Text + " ";
                }
                return result.Trim();
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Erro durante a transcrição de áudio.");
                throw;
            }
            finally
            {
                if (System.IO.File.Exists(tempWavPath))
                {
                    System.IO.File.Delete(tempWavPath);
                }
            }
        }
    }
 }
--- a/Services/IAudioTranscriptionService.cs
+++ b/Services/IAudioTranscriptionService.cs
@ -0,0 +1,9 @@
 using System.Threading.Tasks;
 namespace Convert_It_Online.Services
 {
    public interface IAudioTranscriptionService
    {
        Task<string> TranscribeAsync(string inputPath, string culture = "pt-BR");
    }
 }
--- a/Views/Shared/_Layout.cshtml
+++ b/Views/Shared/_Layout.cshtml
@ -29,6 +29,7 @@
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" />
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css">
    <link rel="stylesheet" href="~/css/site.css" />
    <link rel="manifest" href="~/manifest.json" />
    @if (adEnabled && adProvider == "Google" && !string.IsNullOrEmpty(googlePublisherId))
    {
@ -85,6 +86,19 @@
                                </a></li>
                            </ul>
                        </li>
                        <li class="nav-item dropdown mx-2">
                            <a class="nav-link dropdown-toggle" href="#" id="audioToolsDropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
                                <i class="bi bi-mic me-1"></i>Áudio
                            </a>
                            <ul class="dropdown-menu" aria-labelledby="audioToolsDropdown">
                                <li><a class="dropdown-item" href="@Html.LocalizedUrl("AudioTools", "SpeechToText")">
                                    <i class="bi bi-chat-left-text me-2"></i>Áudio para Texto
                                </a></li>
                                <li><a class="dropdown-item" href="@Html.LocalizedUrl("AudioTools", "TextToSpeech")">
                                    <i class="bi bi-megaphone me-2"></i>Texto para Áudio
                                </a></li>
                            </ul>
                        </li>
                    </ul>
                    <div class="dropdown">
                        <button class="btn btn-secondary dropdown-toggle" type="button" id="languageDropdown" data-bs-toggle="dropdown" aria-expanded="false">
@ -226,6 +240,11 @@
    </footer>
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
    <script>
        if ('serviceWorker' in navigator) {
            navigator.serviceWorker.register('/sw.js');
        }
    </script>
    @await RenderSectionAsync("Scripts", required: false)
 </body>
 </html>
--- a/wwwroot/manifest.json
+++ b/wwwroot/manifest.json
@ -0,0 +1,29 @@
 {
  "name": "Convert-It Online",
  "short_name": "Convert-It",
  "start_url": "/",
  "display": "standalone",
  "background_color": "#0d6efd",
  "theme_color": "#0d6efd",
  "description": "Conversores rápidos de imagem, documento e áudio.",
  "icons": [
    {
      "src": "/favicon.ico",
      "sizes": "64x64",
      "type": "image/x-icon"
    }
  ],
  "share_target": {
    "action": "/AudioTools/SpeechToText/HandleShare",
    "method": "POST",
    "enctype": "multipart/form-data",
    "params": {
      "files": [
        {
          "name": "audio",
          "accept": ["audio/*"]
        }
      ]
    }
  }
 }
--- a/wwwroot/sw.js
+++ b/wwwroot/sw.js
@ -0,0 +1,7 @@
 self.addEventListener('install', (e) => {
  // Instalado
 });
 self.addEventListener('fetch', (e) => {
  // Necessário para ser instalável
 });