Add initial audio recognition requests

This commit is contained in:
2025-08-11 15:26:55 +03:00
parent c1da998c02
commit 672d8573fc
9 changed files with 396 additions and 64 deletions

View File

@@ -12,6 +12,7 @@ import (
"git.vakhrushev.me/av/transcriber/internal/repo"
"git.vakhrushev.me/av/transcriber/internal/repo/ffmpeg"
"git.vakhrushev.me/av/transcriber/internal/service/s3"
"git.vakhrushev.me/av/transcriber/internal/service/speechkit"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
@@ -31,9 +32,10 @@ type CreateTranscribeJobResponse struct {
}
type GetTranscribeJobResponse struct {
JobID string `json:"job_id"`
State string `json:"status"`
CreatedAt time.Time `json:"created_at"`
JobID string `json:"job_id"`
State string `json:"status"`
CreatedAt time.Time `json:"created_at"`
TranscriptionText *string `json:"transcription_text,omitempty"`
}
func (h *TranscribeHandler) CreateTranscribeJob(c *gin.Context) {
@@ -123,9 +125,10 @@ func (h *TranscribeHandler) GetTranscribeJobStatus(c *gin.Context) {
}
c.JSON(http.StatusOK, GetTranscribeJobResponse{
JobID: job.Id,
State: job.State,
CreatedAt: job.CreatedAt,
JobID: job.Id,
State: job.State,
CreatedAt: job.CreatedAt,
TranscriptionText: job.TranscriptionText,
})
}
@@ -233,7 +236,7 @@ func (h *TranscribeHandler) RunUploadJob(c *gin.Context) {
}
job.FileID = &destFileId
job.MoveToState(entity.StateTranscribeReady)
job.MoveToState(entity.StateUploaded)
// Сохраняем информацию о загрузке файла на S3
err = h.fileRepo.Create(destFileRecord)
@@ -251,3 +254,113 @@ func (h *TranscribeHandler) RunUploadJob(c *gin.Context) {
c.Status(http.StatusOK)
}
func (h *TranscribeHandler) RunRecognitionJob(c *gin.Context) {
acquisitionId := uuid.NewString()
rottingTime := time.Now().Add(-1 * time.Hour)
job, err := h.jobRepo.FindAndAcquire(entity.StateUploaded, acquisitionId, rottingTime)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
fileRecord, err := h.fileRepo.GetByID(*job.FileID)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
// Создаем SpeechKit сервис
speechKitService, err := speechkit.NewSpeechKitService()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to initialize SpeechKit service: " + err.Error()})
return
}
// Формируем S3 URI для файла
bucketName := os.Getenv("S3_BUCKET_NAME")
s3URI := fmt.Sprintf("https://storage.yandexcloud.net/%s/%s", bucketName, fileRecord.FileName)
// Запускаем асинхронное распознавание
operationID, err := speechKitService.RecognizeFileFromS3(s3URI)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start recognition: " + err.Error()})
return
}
// Обновляем задачу с ID операции распознавания
job.RecognitionOpID = &operationID
job.MoveToState(entity.StateTranscribe)
err = h.jobRepo.Save(job)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
return
}
c.Status(http.StatusOK)
}
func (h *TranscribeHandler) RunRecognitionCheckJob(c *gin.Context) {
acquisitionId := uuid.NewString()
rottingTime := time.Now().Add(-1 * time.Hour)
job, err := h.jobRepo.FindAndAcquire(entity.StateTranscribe, acquisitionId, rottingTime)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if job.RecognitionOpID == nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "No recognition operation ID found"})
return
}
// Создаем SpeechKit сервис
speechKitService, err := speechkit.NewSpeechKitService()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to initialize SpeechKit service: " + err.Error()})
return
}
// Проверяем статус операции
operation, err := speechKitService.CheckOperationStatus(*job.RecognitionOpID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to check operation status: " + err.Error()})
return
}
if !operation.Done {
// Операция еще не завершена, переводим в состояние ожидания
err = h.jobRepo.Save(job)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
return
}
c.Status(http.StatusOK)
return
}
// Операция завершена, получаем результат
responses, err := speechKitService.GetRecognitionResult(*job.RecognitionOpID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get recognition result: " + err.Error()})
return
}
// Извлекаем текст из результатов
transcriptionText := speechkit.ExtractTranscriptionText(responses)
// Обновляем задачу с результатом
job.TranscriptionText = &transcriptionText
job.MoveToState(entity.StateDone)
err = h.jobRepo.Save(job)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
return
}
c.Status(http.StatusOK)
}