Convert-it/Services/AudioTranscriptionService.cs
Ricardo Carneiro f8f052428f
All checks were successful
Deploy ASP.NET MVC to OCI / build-and-deploy (push) Successful in 22m1s
fix: features de audio
2026-01-26 09:37:47 -03:00

218 lines
8.0 KiB
C#

using System;
using System.Diagnostics;
using System.IO;
using System.Net.Http;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
using Whisper.net;
using Whisper.net.Ggml;
using Xabe.FFmpeg;
using Microsoft.Extensions.Logging;
namespace Convert_It_Online.Services
{
public class AudioTranscriptionService : IAudioTranscriptionService
{
private readonly string _modelPath;
private readonly ILogger<AudioTranscriptionService> _logger;
private readonly HttpClient _httpClient;
private static bool _ffmpegConfigured = false;
public AudioTranscriptionService(ILogger<AudioTranscriptionService> logger)
{
_logger = logger;
_httpClient = new HttpClient();
_modelPath = Path.Combine(AppContext.BaseDirectory, "Models", "ggml-base.bin");
// Garantir que a pasta Models existe
var modelsDir = Path.GetDirectoryName(_modelPath);
if (!Directory.Exists(modelsDir))
{
Directory.CreateDirectory(modelsDir!);
}
// Configurar FFmpeg uma única vez
ConfigureFFmpeg();
}
private void ConfigureFFmpeg()
{
if (_ffmpegConfigured) return;
try
{
string? ffmpegPath = null;
// 1. Primeiro, verificar variável de ambiente (maior prioridade)
var envPath = Environment.GetEnvironmentVariable("FFMPEG_PATH");
if (!string.IsNullOrEmpty(envPath) && Directory.Exists(envPath))
{
ffmpegPath = envPath;
_logger.LogInformation("FFmpeg configurado via FFMPEG_PATH: {Path}", ffmpegPath);
}
// 2. Se não encontrou, procurar em locais comuns
if (ffmpegPath == null)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
// Windows: procurar em locais comuns
var possiblePaths = new[]
{
Path.Combine(AppContext.BaseDirectory, "ffmpeg"),
@"C:\Apps\ffmpeg\bin",
@"C:\Apps\ffmpeg",
Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ProgramFiles), "ffmpeg", "bin"),
@"C:\ffmpeg\bin",
@"C:\ffmpeg",
@"C:\Program Files\ffmpeg\bin",
@"C:\tools\ffmpeg\bin",
@"C:\tools\ffmpeg"
};
foreach (var path in possiblePaths)
{
var ffmpegExe = Path.Combine(path, "ffmpeg.exe");
if (File.Exists(ffmpegExe))
{
ffmpegPath = path;
_logger.LogInformation("FFmpeg encontrado em: {Path}", ffmpegPath);
break;
}
}
// Tentar encontrar via PATH
if (ffmpegPath == null)
{
ffmpegPath = FindFFmpegInPath("ffmpeg.exe");
}
}
else
{
// Linux: verificar locais padrão
var possiblePaths = new[]
{
"/usr/bin",
"/usr/local/bin",
"/opt/ffmpeg/bin"
};
foreach (var path in possiblePaths)
{
var ffmpegExe = Path.Combine(path, "ffmpeg");
if (File.Exists(ffmpegExe))
{
ffmpegPath = path;
_logger.LogInformation("FFmpeg encontrado em: {Path}", ffmpegPath);
break;
}
}
}
}
if (!string.IsNullOrEmpty(ffmpegPath))
{
FFmpeg.SetExecutablesPath(ffmpegPath);
_logger.LogInformation("FFmpeg.SetExecutablesPath configurado: {Path}", ffmpegPath);
}
else
{
_logger.LogWarning("FFmpeg não encontrado em caminhos conhecidos. A transcrição de áudio pode falhar.");
}
_ffmpegConfigured = true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro ao configurar FFmpeg");
}
}
private string? FindFFmpegInPath(string executable)
{
var pathEnv = Environment.GetEnvironmentVariable("PATH");
if (string.IsNullOrEmpty(pathEnv)) return null;
var separator = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? ';' : ':';
var paths = pathEnv.Split(separator);
foreach (var path in paths)
{
var fullPath = Path.Combine(path.Trim(), executable);
if (File.Exists(fullPath))
{
_logger.LogInformation("FFmpeg encontrado no PATH: {Path}", path);
return path;
}
}
return null;
}
private async Task EnsureModelExistsAsync()
{
if (!System.IO.File.Exists(_modelPath))
{
_logger.LogInformation("Baixando modelo Whisper Base...");
var downloader = new WhisperGgmlDownloader(_httpClient);
using var modelStream = await downloader.GetGgmlModelAsync(GgmlType.Base);
using var fileStream = System.IO.File.Create(_modelPath);
await modelStream.CopyToAsync(fileStream);
_logger.LogInformation("Modelo Whisper baixado com sucesso.");
}
}
public async Task<string> TranscribeAsync(string inputPath, string culture = "pt-BR")
{
await EnsureModelExistsAsync();
string tempWavPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.wav");
try
{
_logger.LogInformation("Convertendo áudio para WAV 16kHz Mono...");
// Configurar FFmpeg (assume que está no PATH em Linux)
// Se estiver no Windows, pode precisar de FFmpeg.SetExecutablesPath
var conversion = await FFmpeg.Conversions.New()
.AddParameter($"-i \"{inputPath}\"")
.AddParameter("-ar 16000")
.AddParameter("-ac 1")
.AddParameter("-c:a pcm_s16le")
.SetOutput(tempWavPath)
.Start();
_logger.LogInformation("Iniciando transcrição com Whisper...");
using var factory = WhisperFactory.FromPath(_modelPath);
using var processor = factory.CreateBuilder()
.WithLanguage(culture.Split('-')[0]) // Usa "pt", "es", etc
.Build();
using var wavStream = System.IO.File.OpenRead(tempWavPath);
var result = "";
await foreach (var segment in processor.ProcessAsync(wavStream))
{
result += segment.Text + " ";
}
return result.Trim();
}
catch (Exception ex)
{
_logger.LogError(ex, "Erro durante a transcrição de áudio.");
throw;
}
finally
{
if (System.IO.File.Exists(tempWavPath))
{
System.IO.File.Delete(tempWavPath);
}
}
}
}
}