feat: conversor de audio
All checks were successful
Deploy ASP.NET MVC to OCI / build-and-deploy (push) Successful in 21m10s

This commit is contained in:
Ricardo Carneiro 2026-01-25 14:47:46 -03:00
parent 3b0c93a35e
commit 724e03176e
15 changed files with 503 additions and 1 deletions

View File

@ -0,0 +1,69 @@
using Microsoft.AspNetCore.Mvc;
using Convert_It_Online.Services;
using Microsoft.AspNetCore.Localization;
namespace Convert_It_Online.Areas.AudioTools.Controllers
{
[Area("AudioTools")]
[Route("{culture}/[area]/[controller]")]
[Route("[area]/[controller]")] // Adicionado para Share Target sem cultura fixa
public class SpeechToTextController : Controller
{
private readonly IAudioTranscriptionService _transcriptionService;
private readonly ILogger<SpeechToTextController> _logger;
public SpeechToTextController(IAudioTranscriptionService transcriptionService, ILogger<SpeechToTextController> logger)
{
_transcriptionService = transcriptionService;
_logger = logger;
}
[HttpGet]
public IActionResult Index()
{
return View();
}
[HttpPost]
public async Task<IActionResult> Transcribe(IFormFile audioFile)
{
if (audioFile == null || audioFile.Length == 0)
{
ViewBag.Error = "Por favor, selecione um arquivo de áudio.";
return View("Index");
}
var culture = HttpContext.Features.Get<IRequestCultureFeature>()?.RequestCulture.UICulture.Name ?? "pt-BR";
var tempPath = Path.GetTempFileName();
try
{
using (var stream = new FileStream(tempPath, FileMode.Create))
{
await audioFile.CopyToAsync(stream);
}
var transcription = await _transcriptionService.TranscribeAsync(tempPath, culture);
ViewBag.Result = transcription;
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro no controller ao transcrever.");
ViewBag.Error = "Erro ao processar o áudio. Verifique se o formato é suportado.";
}
finally
{
if (System.IO.File.Exists(tempPath)) System.IO.File.Delete(tempPath);
}
return View("Index");
}
[HttpPost("HandleShare")]
public async Task<IActionResult> HandleShare(IFormFile audio)
{
// O Android via Share Target costuma enviar como 'audio' ou 'file'
return await Transcribe(audio);
}
}
}

View File

@ -0,0 +1,15 @@
using Microsoft.AspNetCore.Mvc;
namespace Convert_It_Online.Areas.AudioTools.Controllers
{
[Area("AudioTools")]
[Route("{culture}/[area]/[controller]")]
public class TextToSpeechController : Controller
{
[HttpGet]
public IActionResult Index()
{
return View();
}
}
}

View File

@ -0,0 +1,69 @@
@{
ViewData["Title"] = "Áudio para Texto (Transcrição)";
var culture = ViewContext.RouteData.Values["culture"] as string ?? "pt-BR";
}
<div class="text-center mb-5">
<h1 class="display-4">@ViewData["Title"]</h1>
<p class="lead">Converta áudios do WhatsApp, reuniões ou gravações em texto automaticamente usando IA.</p>
</div>
<div class="row justify-content-center">
<div class="col-md-8">
<div class="card shadow-custom p-4">
<form asp-action="Transcribe" method="post" enctype="multipart/form-data">
<div class="mb-4">
<label for="audioFile" class="form-label h5">Selecione o arquivo de áudio</label>
<input type="file" class="form-control form-control-lg" id="audioFile" name="audioFile" accept="audio/*" required>
<div class="form-text mt-2">Formatos suportados: MP3, WAV, OGG, OPUS, M4A, etc.</div>
</div>
<div class="d-grid gap-2">
<button type="submit" class="btn btn-primary btn-lg">
<i class="bi bi-mic-fill me-2"></i>Transcrever Áudio
</button>
</div>
</form>
@if (ViewBag.Error != null)
{
<div class="alert alert-danger mt-4" role="alert">
<i class="bi bi-exclamation-triangle-fill me-2"></i>@ViewBag.Error
</div>
}
@if (ViewBag.Result != null)
{
<div class="mt-5">
<h4 class="mb-3">Transcrição:</h4>
<div class="p-3 bg-light border rounded" style="min-height: 150px; white-space: pre-wrap;">@ViewBag.Result</div>
<div class="mt-3 d-flex gap-2">
<button class="btn btn-outline-secondary btn-sm" onclick="copyTranscription()">
<i class="bi bi-clipboard me-1"></i>Copiar Texto
</button>
</div>
</div>
}
</div>
<div class="mt-5">
<h3 class="h5 mb-3"><i class="bi bi-shield-check me-2"></i>Privacidade e Tecnologia</h3>
<p class="text-muted small">
Seu áudio é processado usando a tecnologia <strong>OpenAI Whisper</strong> rodando diretamente em nosso servidor.
Não enviamos seus dados para APIs externas e os arquivos temporários são deletados imediatamente após a conversão.
</p>
</div>
</div>
</div>
@section Scripts {
<script>
function copyTranscription() {
const text = document.querySelector('.bg-light.border.rounded').innerText;
navigator.clipboard.writeText(text).then(() => {
alert('Transcrição copiada!');
});
}
</script>
}

View File

@ -0,0 +1,122 @@
@{
ViewData["Title"] = "Texto para Áudio (Voz)";
}
<div class="text-center mb-5">
<h1 class="display-4">@ViewData["Title"]</h1>
<p class="lead">Converta qualquer texto em fala usando vozes neurais de alta qualidade.</p>
</div>
<div class="row justify-content-center">
<div class="col-md-8">
<div class="card shadow-custom p-4">
<div class="mb-4">
<label for="textInput" class="form-label h5">Digite ou cole seu texto</label>
<textarea class="form-control" id="textInput" rows="6" placeholder="Escreva aqui o que você deseja que seja lido..."></textarea>
</div>
<div class="row mb-4">
<div class="col-md-6">
<label for="voiceSelect" class="form-label">Escolher Voz</label>
<select id="voiceSelect" class="form-select"></select>
</div>
<div class="col-md-3">
<label for="rate" class="form-label">Velocidade</label>
<input type="range" class="form-range" min="0.5" max="2" step="0.1" id="rate" value="1">
</div>
<div class="col-md-3">
<label for="pitch" class="form-label">Tom</label>
<input type="range" class="form-range" min="0" max="2" step="0.1" id="pitch" value="1">
</div>
</div>
<div class="d-grid gap-2 d-md-flex justify-content-md-center">
<button type="button" class="btn btn-primary btn-lg px-5" onclick="speak()">
<i class="bi bi-play-fill me-2"></i>Ouvir
</button>
<button type="button" class="btn btn-outline-danger btn-lg" onclick="stop()">
<i class="bi bi-stop-fill me-2"></i>Parar
</button>
</div>
</div>
<div class="mt-4 alert alert-info">
<i class="bi bi-info-circle me-2"></i>
Esta ferramenta usa as vozes instaladas no seu dispositivo. No Android e Windows, você encontrará opções de vozes neurais muito naturais.
</div>
</div>
</div>
@section Scripts {
<script>
const synth = window.speechSynthesis;
const voiceSelect = document.querySelector('#voiceSelect');
const textInput = document.querySelector('#textInput');
const rate = document.querySelector('#rate');
const pitch = document.querySelector('#pitch');
let voices = [];
function populateVoiceList() {
voices = synth.getVoices().sort(function (a, b) {
const aname = a.name.toUpperCase();
const bname = b.name.toUpperCase();
if (aname < bname) return -1;
else if (aname > bname) return 1;
else return 0;
});
const selectedIndex = voiceSelect.selectedIndex < 0 ? 0 : voiceSelect.selectedIndex;
voiceSelect.innerHTML = '';
for (let i = 0; i < voices.length; i++) {
const option = document.createElement('option');
option.textContent = voices[i].name + ' (' + voices[i].lang + ')';
if (voices[i].default) {
option.textContent += ' -- PADRÃO';
}
option.setAttribute('data-lang', voices[i].lang);
option.setAttribute('data-name', voices[i].name);
voiceSelect.appendChild(option);
}
voiceSelect.selectedIndex = selectedIndex;
}
populateVoiceList();
if (speechSynthesis.onvoiceschanged !== undefined) {
speechSynthesis.onvoiceschanged = populateVoiceList;
}
function speak() {
if (synth.speaking) {
console.error('speechSynthesis.speaking');
return;
}
if (textInput.value !== '') {
const utterThis = new SpeechSynthesisUtterance(textInput.value);
utterThis.onend = function (event) {
console.log('SpeechSynthesisUtterance.onend');
}
utterThis.onerror = function (event) {
console.error('SpeechSynthesisUtterance.onerror');
}
const selectedOption = voiceSelect.selectedOptions[0].getAttribute('data-name');
for (let i = 0; i < voices.length; i++) {
if (voices[i].name === selectedOption) {
utterThis.voice = voices[i];
break;
}
}
utterThis.pitch = pitch.value;
utterThis.rate = rate.value;
synth.speak(utterThis);
}
}
function stop() {
synth.cancel();
}
</script>
}

View File

@ -0,0 +1,5 @@
@using Convert_It_Online
@using Microsoft.AspNetCore.Mvc.Localization
@addTagHelper *, Microsoft.AspNetCore.Mvc.TagHelpers
@inject IViewLocalizer Localizer

View File

@ -0,0 +1,3 @@
@{
Layout = "_Layout";
}

View File

@ -23,6 +23,9 @@
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" /> <PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
<PackageReference Include="Serilog.Enrichers.Process" Version="3.0.0" /> <PackageReference Include="Serilog.Enrichers.Process" Version="3.0.0" />
<PackageReference Include="Serilog.Enrichers.Thread" Version="4.0.0" /> <PackageReference Include="Serilog.Enrichers.Thread" Version="4.0.0" />
<PackageReference Include="Whisper.net" Version="1.9.0" />
<PackageReference Include="Whisper.net.Runtime" Version="1.9.0" />
<PackageReference Include="Xabe.FFmpeg" Version="6.0.2" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -44,6 +44,14 @@ FROM base AS final
WORKDIR /app WORKDIR /app
COPY --from=publish /app/publish . COPY --from=publish /app/publish .
# Instalar ffmpeg e bibliotecas nativas (rodar como root)
USER root
RUN apt-get update && apt-get install -y \
ffmpeg \
libc6-dev \
&& rm -rf /var/lib/apt/lists/*
USER app
# Variáveis de ambiente otimizadas para produção # Variáveis de ambiente otimizadas para produção
ENV DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=false ENV DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=false
ENV DOTNET_USE_POLLING_FILE_WATCHER=true ENV DOTNET_USE_POLLING_FILE_WATCHER=true

View File

@ -163,6 +163,7 @@ builder.Host.UseSerilog();
builder.Services.AddLocalization(); builder.Services.AddLocalization();
builder.Services.AddSingleton<IUrlTranslationService, UrlTranslationService>(); builder.Services.AddSingleton<IUrlTranslationService, UrlTranslationService>();
builder.Services.AddSingleton<IAudioTranscriptionService, AudioTranscriptionService>();
var supportedCultures = new[] { "pt-BR", "es-MX", "es-CL", "es-PY" }; var supportedCultures = new[] { "pt-BR", "es-MX", "es-CL", "es-PY" };
builder.Services.Configure<RequestLocalizationOptions>(options => builder.Services.Configure<RequestLocalizationOptions>(options =>

View File

@ -1 +1,47 @@
 # Convert-It Online
Ferramenta multiuso de conversão de arquivos (Imagens, Documentos, Texto e Áudio) desenvolvida em ASP.NET Core 8 MVC.
## 🛠️ Funcionalidades
- **Imagens:** HEIC para JPG, JPG para WebP.
- **Documentos:** PDF para Texto, Extração de Linha Digitável de Boletos (Barcode).
- **Texto:** Conversor de Case (Maiúsculo/Minúsculo).
- **Áudio:** Transcrição de Áudio para Texto (Whisper AI) e Texto para Voz (Web Speech API).
- **PWA:** Suporte a instalação e integração com menu de compartilhamento do Android (Share Target).
## 🚀 Dependências Externas (Obrigatório)
Para as funcionalidades de áudio (transcrição), o projeto depende do **FFmpeg**.
### 🐧 Linux (Ubuntu/Debian)
```bash
sudo apt update
sudo apt install ffmpeg
```
### 🪟 Windows
1. Baixe os binários em [ffmpeg.org](https://ffmpeg.org/download.html).
2. Extraia para uma pasta (ex: `C:\ffmpeg`).
3. Adicione a pasta `bin` (ex: `C:\ffmpeg\bin`) às **Variáveis de Ambiente do Sistema (PATH)**.
4. Reinicie o terminal ou o Visual Studio.
### 🐳 Docker
A imagem Docker já está configurada para instalar o `ffmpeg` automaticamente durante o build.
## 💻 Desenvolvimento Local
1. Certifique-se de ter o .NET 8 SDK instalado.
2. Clone o repositório.
3. Configure o FFmpeg conforme instruções acima.
4. Execute o comando:
```bash
dotnet run
```
## 📱 PWA & Android Share Target
O projeto está configurado como um Progressive Web App. Ao "Instalar" o site no Android:
1. Ele aparecerá como um aplicativo nativo.
2. Você poderá compartilhar arquivos de áudio diretamente do WhatsApp para o Convert-It para transcrição automática.
---
Desenvolvido por Ricardo.

View File

@ -0,0 +1,97 @@
using System;
using System.IO;
using System.Net.Http;
using System.Threading.Tasks;
using Whisper.net;
using Whisper.net.Ggml;
using Xabe.FFmpeg;
using Microsoft.Extensions.Logging;
namespace Convert_It_Online.Services
{
public class AudioTranscriptionService : IAudioTranscriptionService
{
private readonly string _modelPath;
private readonly ILogger<AudioTranscriptionService> _logger;
private readonly HttpClient _httpClient;
public AudioTranscriptionService(ILogger<AudioTranscriptionService> logger)
{
_logger = logger;
_httpClient = new HttpClient();
_modelPath = Path.Combine(AppContext.BaseDirectory, "Models", "ggml-base.bin");
// Garantir que a pasta Models existe
var modelsDir = Path.GetDirectoryName(_modelPath);
if (!Directory.Exists(modelsDir))
{
Directory.CreateDirectory(modelsDir!);
}
}
private async Task EnsureModelExistsAsync()
{
if (!System.IO.File.Exists(_modelPath))
{
_logger.LogInformation("Baixando modelo Whisper Base...");
var downloader = new WhisperGgmlDownloader(_httpClient);
using var modelStream = await downloader.GetGgmlModelAsync(GgmlType.Base);
using var fileStream = System.IO.File.Create(_modelPath);
await modelStream.CopyToAsync(fileStream);
_logger.LogInformation("Modelo Whisper baixado com sucesso.");
}
}
public async Task<string> TranscribeAsync(string inputPath, string culture = "pt-BR")
{
await EnsureModelExistsAsync();
string tempWavPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.wav");
try
{
_logger.LogInformation("Convertendo áudio para WAV 16kHz Mono...");
// Configurar FFmpeg (assume que está no PATH em Linux)
// Se estiver no Windows, pode precisar de FFmpeg.SetExecutablesPath
var conversion = await FFmpeg.Conversions.New()
.AddParameter($"-i \"{inputPath}\"")
.AddParameter("-ar 16000")
.AddParameter("-ac 1")
.AddParameter("-c:a pcm_s16le")
.SetOutput(tempWavPath)
.Start();
_logger.LogInformation("Iniciando transcrição com Whisper...");
using var factory = WhisperFactory.FromPath(_modelPath);
using var processor = factory.CreateBuilder()
.WithLanguage(culture.Split('-')[0]) // Usa "pt", "es", etc
.Build();
using var wavStream = System.IO.File.OpenRead(tempWavPath);
var result = "";
await foreach (var segment in processor.ProcessAsync(wavStream))
{
result += segment.Text + " ";
}
return result.Trim();
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro durante a transcrição de áudio.");
throw;
}
finally
{
if (System.IO.File.Exists(tempWavPath))
{
System.IO.File.Delete(tempWavPath);
}
}
}
}
}

View File

@ -0,0 +1,9 @@
using System.Threading.Tasks;
namespace Convert_It_Online.Services
{
public interface IAudioTranscriptionService
{
Task<string> TranscribeAsync(string inputPath, string culture = "pt-BR");
}
}

View File

@ -29,6 +29,7 @@
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" /> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" />
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css">
<link rel="stylesheet" href="~/css/site.css" /> <link rel="stylesheet" href="~/css/site.css" />
<link rel="manifest" href="~/manifest.json" />
@if (adEnabled && adProvider == "Google" && !string.IsNullOrEmpty(googlePublisherId)) @if (adEnabled && adProvider == "Google" && !string.IsNullOrEmpty(googlePublisherId))
{ {
@ -85,6 +86,19 @@
</a></li> </a></li>
</ul> </ul>
</li> </li>
<li class="nav-item dropdown mx-2">
<a class="nav-link dropdown-toggle" href="#" id="audioToolsDropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<i class="bi bi-mic me-1"></i>Áudio
</a>
<ul class="dropdown-menu" aria-labelledby="audioToolsDropdown">
<li><a class="dropdown-item" href="@Html.LocalizedUrl("AudioTools", "SpeechToText")">
<i class="bi bi-chat-left-text me-2"></i>Áudio para Texto
</a></li>
<li><a class="dropdown-item" href="@Html.LocalizedUrl("AudioTools", "TextToSpeech")">
<i class="bi bi-megaphone me-2"></i>Texto para Áudio
</a></li>
</ul>
</li>
</ul> </ul>
<div class="dropdown"> <div class="dropdown">
<button class="btn btn-secondary dropdown-toggle" type="button" id="languageDropdown" data-bs-toggle="dropdown" aria-expanded="false"> <button class="btn btn-secondary dropdown-toggle" type="button" id="languageDropdown" data-bs-toggle="dropdown" aria-expanded="false">
@ -226,6 +240,11 @@
</footer> </footer>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
<script>
if ('serviceWorker' in navigator) {
navigator.serviceWorker.register('/sw.js');
}
</script>
@await RenderSectionAsync("Scripts", required: false) @await RenderSectionAsync("Scripts", required: false)
</body> </body>
</html> </html>

29
wwwroot/manifest.json Normal file
View File

@ -0,0 +1,29 @@
{
"name": "Convert-It Online",
"short_name": "Convert-It",
"start_url": "/",
"display": "standalone",
"background_color": "#0d6efd",
"theme_color": "#0d6efd",
"description": "Conversores rápidos de imagem, documento e áudio.",
"icons": [
{
"src": "/favicon.ico",
"sizes": "64x64",
"type": "image/x-icon"
}
],
"share_target": {
"action": "/AudioTools/SpeechToText/HandleShare",
"method": "POST",
"enctype": "multipart/form-data",
"params": {
"files": [
{
"name": "audio",
"accept": ["audio/*"]
}
]
}
}
}

7
wwwroot/sw.js Normal file
View File

@ -0,0 +1,7 @@
self.addEventListener('install', (e) => {
// Instalado
});
self.addEventListener('fetch', (e) => {
// Necessário para ser instalável
});