Add initial audio recognition requests

This commit is contained in:
2025-08-11 15:26:55 +03:00
parent c1da998c02
commit 672d8573fc
9 changed files with 396 additions and 64 deletions

View File

@@ -13,3 +13,9 @@ S3_BUCKET_NAME=your_bucket_name
# Кастомный endpoint для S3 (оставить пустым для AWS S3, заполнить для MinIO или других S3-совместимых сервисов)
S3_ENDPOINT=
# Yandex Cloud Speech-to-Text Configuration
# API ключ для доступа к Yandex Cloud (получить в консоли Yandex Cloud)
YANDEX_CLOUD_API_KEY=your_api_key_here
# ID папки в Yandex Cloud (получить в консоли Yandex Cloud)
YANDEX_CLOUD_FOLDER_ID=your_folder_id_here

16
go.mod
View File

@@ -10,11 +10,13 @@ require (
github.com/aws/aws-sdk-go-v2/service/s3 v1.86.0
github.com/doug-martin/goqu/v9 v9.19.0
github.com/gin-gonic/gin v1.10.1
github.com/google/uuid v1.4.0
github.com/google/uuid v1.6.0
github.com/joho/godotenv v1.5.1
github.com/mattn/go-sqlite3 v1.14.17
github.com/pressly/goose/v3 v3.15.1
github.com/stretchr/testify v1.10.0
github.com/yandex-cloud/go-genproto v0.17.0
google.golang.org/grpc v1.74.2
)
require (
@@ -54,10 +56,12 @@ require (
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
golang.org/x/crypto v0.38.0 // indirect
golang.org/x/net v0.40.0 // indirect
golang.org/x/sys v0.33.0 // indirect
golang.org/x/text v0.25.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect
google.golang.org/protobuf v1.36.7 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

66
go.sum
View File

@@ -60,6 +60,10 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ=
github.com/gin-gonic/gin v1.10.1/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
@@ -72,11 +76,13 @@ github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LB
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
@@ -128,34 +134,54 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/yandex-cloud/go-genproto v0.17.0 h1:uQ5Lr8B/xIyY1KrOm7pItYY3YT/DL1O8gVaY03ouYKM=
github.com/yandex-cloud/go-genproto v0.17.0/go.mod h1:0LDD/IZLIUIV4iPH+YcF+jysO3jkSvADFGm4dCAuwQo=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E=
go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE=
go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs=
go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs=
go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY=
go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis=
go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4=
go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w=
go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.13.0 h1:Iey4qkscZuv0VvIt8E0neZjtPVQFSc870HQ448QgEmQ=
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a h1:SGktgSolFCo75dnHJF2yMvnns6jCmHFJ0vE4Vn2JKvQ=
google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a/go.mod h1:a77HrdMjoeKbnd2jmgcWdaS++ZLZAEq3orIOAEIKiVw=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a h1:v2PbRU4K3llS09c7zodFpNePeamkAwG3mPrAery9VeE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
google.golang.org/grpc v1.74.2 h1:WoosgB65DlWVC9FqI82dGsZhWFNBSLjQ84bjROOpMu4=
google.golang.org/grpc v1.74.2/go.mod h1:CtQ+BGjaAIXHs/5YS3i473GqwBBa1zGQNevxdeBEXrM=
google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -12,6 +12,7 @@ import (
"git.vakhrushev.me/av/transcriber/internal/repo"
"git.vakhrushev.me/av/transcriber/internal/repo/ffmpeg"
"git.vakhrushev.me/av/transcriber/internal/service/s3"
"git.vakhrushev.me/av/transcriber/internal/service/speechkit"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
@@ -31,9 +32,10 @@ type CreateTranscribeJobResponse struct {
}
type GetTranscribeJobResponse struct {
JobID string `json:"job_id"`
State string `json:"status"`
CreatedAt time.Time `json:"created_at"`
JobID string `json:"job_id"`
State string `json:"status"`
CreatedAt time.Time `json:"created_at"`
TranscriptionText *string `json:"transcription_text,omitempty"`
}
func (h *TranscribeHandler) CreateTranscribeJob(c *gin.Context) {
@@ -123,9 +125,10 @@ func (h *TranscribeHandler) GetTranscribeJobStatus(c *gin.Context) {
}
c.JSON(http.StatusOK, GetTranscribeJobResponse{
JobID: job.Id,
State: job.State,
CreatedAt: job.CreatedAt,
JobID: job.Id,
State: job.State,
CreatedAt: job.CreatedAt,
TranscriptionText: job.TranscriptionText,
})
}
@@ -233,7 +236,7 @@ func (h *TranscribeHandler) RunUploadJob(c *gin.Context) {
}
job.FileID = &destFileId
job.MoveToState(entity.StateTranscribeReady)
job.MoveToState(entity.StateUploaded)
// Сохраняем информацию о загрузке файла на S3
err = h.fileRepo.Create(destFileRecord)
@@ -251,3 +254,113 @@ func (h *TranscribeHandler) RunUploadJob(c *gin.Context) {
c.Status(http.StatusOK)
}
func (h *TranscribeHandler) RunRecognitionJob(c *gin.Context) {
acquisitionId := uuid.NewString()
rottingTime := time.Now().Add(-1 * time.Hour)
job, err := h.jobRepo.FindAndAcquire(entity.StateUploaded, acquisitionId, rottingTime)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
fileRecord, err := h.fileRepo.GetByID(*job.FileID)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
// Создаем SpeechKit сервис
speechKitService, err := speechkit.NewSpeechKitService()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to initialize SpeechKit service: " + err.Error()})
return
}
// Формируем S3 URI для файла
bucketName := os.Getenv("S3_BUCKET_NAME")
s3URI := fmt.Sprintf("https://storage.yandexcloud.net/%s/%s", bucketName, fileRecord.FileName)
// Запускаем асинхронное распознавание
operationID, err := speechKitService.RecognizeFileFromS3(s3URI)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start recognition: " + err.Error()})
return
}
// Обновляем задачу с ID операции распознавания
job.RecognitionOpID = &operationID
job.MoveToState(entity.StateTranscribe)
err = h.jobRepo.Save(job)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
return
}
c.Status(http.StatusOK)
}
func (h *TranscribeHandler) RunRecognitionCheckJob(c *gin.Context) {
acquisitionId := uuid.NewString()
rottingTime := time.Now().Add(-1 * time.Hour)
job, err := h.jobRepo.FindAndAcquire(entity.StateTranscribe, acquisitionId, rottingTime)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if job.RecognitionOpID == nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "No recognition operation ID found"})
return
}
// Создаем SpeechKit сервис
speechKitService, err := speechkit.NewSpeechKitService()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to initialize SpeechKit service: " + err.Error()})
return
}
// Проверяем статус операции
operation, err := speechKitService.CheckOperationStatus(*job.RecognitionOpID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to check operation status: " + err.Error()})
return
}
if !operation.Done {
// Операция еще не завершена, переводим в состояние ожидания
err = h.jobRepo.Save(job)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
return
}
c.Status(http.StatusOK)
return
}
// Операция завершена, получаем результат
responses, err := speechKitService.GetRecognitionResult(*job.RecognitionOpID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get recognition result: " + err.Error()})
return
}
// Извлекаем текст из результатов
transcriptionText := speechkit.ExtractTranscriptionText(responses)
// Обновляем задачу с результатом
job.TranscriptionText = &transcriptionText
job.MoveToState(entity.StateDone)
err = h.jobRepo.Save(job)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
return
}
c.Status(http.StatusOK)
}

View File

@@ -5,24 +5,25 @@ import (
)
type TranscribeJob struct {
Id string
State string
FileID *string
IsError bool
ErrorText *string
Worker *string
AcquiredAt *time.Time
CreatedAt time.Time
Id string
State string
FileID *string
IsError bool
ErrorText *string
Worker *string
AcquiredAt *time.Time
CreatedAt time.Time
RecognitionOpID *string // ID операции распознавания в Yandex Cloud
TranscriptionText *string // Результат распознавания
}
const (
StateCreated = "created"
StateConverted = "converted"
StateUploaded = "uploaded"
StateTranscribeReady = "transcribe_ready"
StateTranscribeWait = "transcribe_wait"
StateDone = "done"
StatusFailed = "failed"
StateCreated = "created"
StateConverted = "converted"
StateUploaded = "uploaded"
StateTranscribe = "transcribe"
StateDone = "done"
StatusFailed = "failed"
)
func (j *TranscribeJob) MoveToState(state string) {

View File

@@ -21,14 +21,16 @@ func NewTranscriptJobRepository(db *sql.DB, gq *goqu.Database) *TranscriptJobRep
func (repo *TranscriptJobRepository) Create(job *entity.TranscribeJob) error {
record := goqu.Record{
"id": job.Id,
"state": job.State,
"file_id": job.FileID,
"is_error": job.IsError,
"error_text": job.ErrorText,
"worker": job.Worker,
"acquired_at": job.AcquiredAt,
"created_at": job.CreatedAt,
"id": job.Id,
"state": job.State,
"file_id": job.FileID,
"is_error": job.IsError,
"error_text": job.ErrorText,
"worker": job.Worker,
"acquired_at": job.AcquiredAt,
"created_at": job.CreatedAt,
"recognition_op_id": job.RecognitionOpID,
"transcription_text": job.TranscriptionText,
}
query := repo.gq.Insert("transcribe_jobs").Rows(record)
sql, args, err := query.ToSQL()
@@ -46,12 +48,14 @@ func (repo *TranscriptJobRepository) Create(job *entity.TranscribeJob) error {
func (repo *TranscriptJobRepository) Save(job *entity.TranscribeJob) error {
record := goqu.Record{
"state": job.State,
"file_id": job.FileID,
"is_error": job.IsError,
"error_text": job.ErrorText,
"worker": job.Worker,
"acquired_at": job.AcquiredAt,
"state": job.State,
"file_id": job.FileID,
"is_error": job.IsError,
"error_text": job.ErrorText,
"worker": job.Worker,
"acquired_at": job.AcquiredAt,
"recognition_op_id": job.RecognitionOpID,
"transcription_text": job.TranscriptionText,
}
query := repo.gq.Update("transcribe_jobs").Set(record).Where(goqu.C("id").Eq(job.Id))
sql, args, err := query.ToSQL()
@@ -77,6 +81,8 @@ func (repo *TranscriptJobRepository) GetByID(id string) (*entity.TranscribeJob,
"worker",
"acquired_at",
"created_at",
"recognition_op_id",
"transcription_text",
).Where(goqu.C("id").Eq(id))
sql, args, err := query.ToSQL()
if err != nil {
@@ -93,6 +99,8 @@ func (repo *TranscriptJobRepository) GetByID(id string) (*entity.TranscribeJob,
&job.Worker,
&job.AcquiredAt,
&job.CreatedAt,
&job.RecognitionOpID,
&job.TranscriptionText,
)
if err != nil {
return nil, fmt.Errorf("failed to get transcribe job: %w", err)
@@ -154,6 +162,8 @@ func (repo *TranscriptJobRepository) FindAndAcquire(state, acquisitionId string,
"worker",
"acquired_at",
"created_at",
"recognition_op_id",
"transcription_text",
).Where(goqu.C("worker").Eq(acquisitionId))
sql, args, err = selectQuery.ToSQL()
@@ -171,6 +181,8 @@ func (repo *TranscriptJobRepository) FindAndAcquire(state, acquisitionId string,
&job.Worker,
&job.AcquiredAt,
&job.CreatedAt,
&job.RecognitionOpID,
&job.TranscriptionText,
)
if err != nil {
return nil, fmt.Errorf("failed to get transcribe job: %w", err)

View File

@@ -0,0 +1,160 @@
package speechkit
import (
"context"
"fmt"
"os"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/metadata"
stt "github.com/yandex-cloud/go-genproto/yandex/cloud/ai/stt/v3"
"github.com/yandex-cloud/go-genproto/yandex/cloud/operation"
)
const (
SpeechKitEndpoint = "stt.api.cloud.yandex.net:443"
)
type SpeechKitService struct {
conn *grpc.ClientConn
sttClient stt.AsyncRecognizerClient
opClient operation.OperationServiceClient
apiKey string
folderID string
}
func NewSpeechKitService() (*SpeechKitService, error) {
apiKey := os.Getenv("YANDEX_CLOUD_API_KEY")
folderID := os.Getenv("YANDEX_CLOUD_FOLDER_ID")
if apiKey == "" || folderID == "" {
return nil, fmt.Errorf("missing required Yandex Cloud environment variables")
}
// Создаем защищенное соединение
creds := credentials.NewTLS(nil)
conn, err := grpc.NewClient(SpeechKitEndpoint, grpc.WithTransportCredentials(creds))
if err != nil {
return nil, fmt.Errorf("failed to connect to SpeechKit: %w", err)
}
sttClient := stt.NewAsyncRecognizerClient(conn)
opClient := operation.NewOperationServiceClient(conn)
return &SpeechKitService{
conn: conn,
sttClient: sttClient,
opClient: opClient,
apiKey: apiKey,
folderID: folderID,
}, nil
}
func (s *SpeechKitService) Close() error {
return s.conn.Close()
}
// RecognizeFileFromS3 запускает асинхронное распознавание файла из S3
func (s *SpeechKitService) RecognizeFileFromS3(s3URI string) (string, error) {
ctx := context.Background()
// Добавляем авторизацию в контекст
ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey)
// Создаем запрос на распознавание
req := &stt.RecognizeFileRequest{
AudioSource: &stt.RecognizeFileRequest_Uri{
Uri: s3URI,
},
RecognitionModel: &stt.RecognitionModelOptions{
Model: "general", // Используем общую модель
AudioFormat: &stt.AudioFormatOptions{
AudioFormat: &stt.AudioFormatOptions_ContainerAudio{
ContainerAudio: &stt.ContainerAudio{
ContainerAudioType: stt.ContainerAudio_OGG_OPUS,
},
},
},
TextNormalization: &stt.TextNormalizationOptions{
TextNormalization: stt.TextNormalizationOptions_TEXT_NORMALIZATION_ENABLED,
ProfanityFilter: false,
LiteratureText: true,
},
AudioProcessingType: stt.RecognitionModelOptions_FULL_DATA,
},
SpeakerLabeling: &stt.SpeakerLabelingOptions{
SpeakerLabeling: stt.SpeakerLabelingOptions_SPEAKER_LABELING_ENABLED,
},
}
// Отправляем запрос
op, err := s.sttClient.RecognizeFile(ctx, req)
if err != nil {
return "", fmt.Errorf("failed to start recognition: %w", err)
}
return op.Id, nil
}
// GetRecognitionResult получает результат распознавания по ID операции
func (s *SpeechKitService) GetRecognitionResult(operationID string) ([]*stt.StreamingResponse, error) {
ctx := context.Background()
// Добавляем авторизацию в контекст
ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey)
req := &stt.GetRecognitionRequest{
OperationId: operationID,
}
stream, err := s.sttClient.GetRecognition(ctx, req)
if err != nil {
return nil, fmt.Errorf("failed to get recognition stream: %w", err)
}
var responses []*stt.StreamingResponse
for {
resp, err := stream.Recv()
if err != nil {
if err.Error() == "EOF" {
break
}
return nil, fmt.Errorf("failed to receive recognition response: %w", err)
}
responses = append(responses, resp)
}
return responses, nil
}
// CheckOperationStatus проверяет статус операции распознавания
func (s *SpeechKitService) CheckOperationStatus(operationID string) (*operation.Operation, error) {
ctx := context.Background()
op, err := s.opClient.Get(ctx, &operation.GetOperationRequest{
OperationId: operationID,
})
if err != nil {
return nil, fmt.Errorf("failed to get operation status: %w", err)
}
return op, nil
}
// ExtractTranscriptionText извлекает текст из результатов распознавания
func ExtractTranscriptionText(responses []*stt.StreamingResponse) string {
var fullText string
for _, resp := range responses {
if final := resp.GetFinal(); final != nil {
for _, alt := range final.Alternatives {
fullText += alt.Text + " "
}
}
}
return fullText
}

View File

@@ -61,6 +61,8 @@ func main() {
api.POST("/transcribe/convert", transcribeHandler.RunConversionJob)
api.POST("/transcribe/upload", transcribeHandler.RunUploadJob)
api.POST("/transcribe/recognize", transcribeHandler.RunRecognitionJob)
api.POST("/transcribe/check", transcribeHandler.RunRecognitionCheckJob)
}
// Добавляем middleware для обработки больших файлов

View File

@@ -3,11 +3,19 @@ CREATE TABLE transcribe_jobs (
id TEXT PRIMARY KEY,
state TEXT NOT NULL,
file_id TEXT,
is_error BOOLEAN NOT NULL,
error_text TEXT,
worker TEXT,
acquired_at DATETIME,
acquisition_id TEXT,
acquire_time DATETIME,
delay_time DATETIME,
recognition_op_id TEXT,
transcription_text TEXT,
created_at DATETIME NOT NULL,
updated_at DATETIME NOT NULL,
FOREIGN KEY (file_id) REFERENCES files(id)
);