diff --git a/internal/adapter/metaviewer/ffmpeg/ffmpeg.go b/internal/adapter/metaviewer/ffmpeg/ffmpeg.go new file mode 100644 index 0000000..1671074 --- /dev/null +++ b/internal/adapter/metaviewer/ffmpeg/ffmpeg.go @@ -0,0 +1,72 @@ +package ffmpeg + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "strconv" + + "git.vakhrushev.me/av/transcriber/internal/contract" +) + +const ffprobeExecutable = "ffprobe" + +type FfmpegMetaViewer struct { +} + +// ffprobeOutput представляет структуру JSON-ответа от ffprobe +type ffprobeOutput struct { + Format struct { + Duration string `json:"duration"` + } `json:"format"` +} + +func NewFfmpegMetaViewer() *FfmpegMetaViewer { + return &FfmpegMetaViewer{} +} + +func (m *FfmpegMetaViewer) GetInfo(src string) (*contract.AudioInfo, error) { + // Проверяем существование исходного файла + if _, err := os.Stat(src); os.IsNotExist(err) { + return nil, fmt.Errorf("input file does not exist: %s", src) + } + + // Проверяем, что ffprobe доступен в системе + if _, err := exec.LookPath(ffprobeExecutable); err != nil { + return nil, fmt.Errorf("ffprobe not found in PATH: %w", err) + } + + // Создаем команду ffprobe для получения метаданных + cmd := exec.Command(ffprobeExecutable, + "-v", "quiet", // тихий режим (без лишнего вывода) + "-print_format", "json", // вывод в формате JSON + "-show_format", // показать информацию о формате + src, // входной файл + ) + + // Выполняем команду и получаем вывод + output, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("ffprobe execution failed: %w", err) + } + + // Парсим JSON-ответ + var probeResult ffprobeOutput + if err := json.Unmarshal(output, &probeResult); err != nil { + return nil, fmt.Errorf("failed to parse ffprobe output: %w", err) + } + + // Конвертируем длительность из строки в секунды + durationFloat, err := strconv.ParseFloat(probeResult.Format.Duration, 64) + if err != nil { + return nil, fmt.Errorf("failed to parse duration: %w", err) + } + + // Округляем до целых секунд + durationSeconds := int(durationFloat + 0.5) // +0.5 для правильного округления + + return &contract.AudioInfo{ + Seconds: durationSeconds, + }, nil +} diff --git a/internal/contract/contract.go b/internal/contract/contract.go index 5a9b6c7..eabf0ba 100644 --- a/internal/contract/contract.go +++ b/internal/contract/contract.go @@ -6,6 +6,14 @@ import ( "git.vakhrushev.me/av/transcriber/internal/entity" ) +type AudioInfo struct { + Seconds int // Длина аудиофайла в секундах +} + +type AudioMetaViewer interface { + GetInfo(src string) (*AudioInfo, error) +} + type AudioFileConverter interface { Convert(src, dest string) error } diff --git a/internal/controller/http/transcribe_test.go b/internal/controller/http/transcribe_test.go index 1f2b0c5..3fba6bb 100644 --- a/internal/controller/http/transcribe_test.go +++ b/internal/controller/http/transcribe_test.go @@ -15,7 +15,8 @@ import ( "testing" "time" - "git.vakhrushev.me/av/transcriber/internal/adapter/converter/ffmpeg" + ffmpegconv "git.vakhrushev.me/av/transcriber/internal/adapter/converter/ffmpeg" + ffmpegmv "git.vakhrushev.me/av/transcriber/internal/adapter/metaviewer/ffmpeg" "git.vakhrushev.me/av/transcriber/internal/adapter/recognizer" "git.vakhrushev.me/av/transcriber/internal/adapter/repo/sqlite" "git.vakhrushev.me/av/transcriber/internal/entity" @@ -58,10 +59,11 @@ func setupTestRouter(t *testing.T) (*gin.Engine, *TranscribeHandler) { fileRepo := sqlite.NewFileRepository(db, gq) jobRepo := sqlite.NewTranscriptJobRepository(db, gq) - converter := ffmpeg.NewFfmpegConverter() + metaviewer := ffmpegmv.NewFfmpegMetaViewer() + converter := ffmpegconv.NewFfmpegConverter() recognizer := &recognizer.MemoryAudioRecognizer{} - trsService := service.NewTranscribeService(jobRepo, fileRepo, converter, recognizer) + trsService := service.NewTranscribeService(jobRepo, fileRepo, metaviewer, converter, recognizer) handler := NewTranscribeHandler(jobRepo, trsService) diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index f126d8e..c5d8b11 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -24,6 +24,16 @@ var ( []string{"file_extension"}, ) + // Время конвертации файлов (в секундах) + InputFileDurationHistogram = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "transcriber_input_file_duration_seconds", + Help: "Duration of input audio file", + Buckets: []float64{15, 30, 60, 120, 300, 600, 1200, 1800, 2400, 3000, 3600, 7200, 10800, 14400}, + }, + []string{}, + ) + // Время конвертации файлов (в секундах) ConversionDurationHistogram = promauto.NewHistogramVec( prometheus.HistogramOpts{ diff --git a/internal/service/transcribe.go b/internal/service/transcribe.go index 3ca90a2..aa56450 100644 --- a/internal/service/transcribe.go +++ b/internal/service/transcribe.go @@ -25,6 +25,7 @@ const ( type TranscribeService struct { jobRepo contract.TranscriptJobRepository fileRepo contract.FileRepository + metaviewer contract.AudioMetaViewer converter contract.AudioFileConverter recognizer contract.AudioRecognizer } @@ -32,12 +33,14 @@ type TranscribeService struct { func NewTranscribeService( jobRepo contract.TranscriptJobRepository, fileRepo contract.FileRepository, + metaviewer contract.AudioMetaViewer, converter contract.AudioFileConverter, recognizer contract.AudioRecognizer, ) *TranscribeService { return &TranscribeService{ jobRepo: jobRepo, fileRepo: fileRepo, + metaviewer: metaviewer, converter: converter, recognizer: recognizer, } @@ -74,6 +77,12 @@ func (s *TranscribeService) CreateTranscribeJob(file io.Reader, fileName string) return nil, err } + info, err := s.metaviewer.GetInfo(storageFilePath) + if err != nil { + return nil, err + } + + metrics.InputFileDurationHistogram.WithLabelValues().Observe(float64(info.Seconds)) metrics.InputFileSizeHistogram.WithLabelValues(ext).Observe(float64(size)) // Создаем запись в таблице files diff --git a/main.go b/main.go index 918d7e4..de894bb 100644 --- a/main.go +++ b/main.go @@ -12,7 +12,8 @@ import ( "syscall" "time" - "git.vakhrushev.me/av/transcriber/internal/adapter/converter/ffmpeg" + ffmpegconv "git.vakhrushev.me/av/transcriber/internal/adapter/converter/ffmpeg" + ffmpegmv "git.vakhrushev.me/av/transcriber/internal/adapter/metaviewer/ffmpeg" "git.vakhrushev.me/av/transcriber/internal/adapter/recognizer/yandex" "git.vakhrushev.me/av/transcriber/internal/adapter/repo/sqlite" httpcontroller "git.vakhrushev.me/av/transcriber/internal/controller/http" @@ -62,7 +63,8 @@ func main() { // Создаем адаптеры - converter := ffmpeg.NewFfmpegConverter() + metaviewer := ffmpegmv.NewFfmpegMetaViewer() + converter := ffmpegconv.NewFfmpegConverter() recognizer, err := yandex.NewYandexAudioRecognizerService(yandex.YandexAudioRecognizerConfig{ Region: os.Getenv("AWS_REGION"), @@ -80,7 +82,7 @@ func main() { // Создаем сервисы - transcribeService := service.NewTranscribeService(jobRepo, fileRepo, converter, recognizer) + transcribeService := service.NewTranscribeService(jobRepo, fileRepo, metaviewer, converter, recognizer) // Создаем воркеры