From 672d8573fc65b8a0a648867656ac38a58ec711a2 Mon Sep 17 00:00:00 2001 From: Anton Vakhrushev Date: Mon, 11 Aug 2025 15:26:55 +0300 Subject: [PATCH] Add initial audio recognition requests --- .env.example | 6 + go.mod | 16 +- go.sum | 66 +++++--- internal/controller/http/transcribe.go | 127 +++++++++++++- internal/entity/job.go | 31 ++-- internal/repo/sqlite/transcript_job_repo.go | 40 +++-- internal/service/speechkit/speechkit.go | 160 ++++++++++++++++++ main.go | 2 + .../002_create_transcribe_jobs_table.sql | 12 +- 9 files changed, 396 insertions(+), 64 deletions(-) create mode 100644 internal/service/speechkit/speechkit.go diff --git a/.env.example b/.env.example index c7acc17..005a399 100644 --- a/.env.example +++ b/.env.example @@ -13,3 +13,9 @@ S3_BUCKET_NAME=your_bucket_name # Кастомный endpoint для S3 (оставить пустым для AWS S3, заполнить для MinIO или других S3-совместимых сервисов) S3_ENDPOINT= + +# Yandex Cloud Speech-to-Text Configuration +# API ключ для доступа к Yandex Cloud (получить в консоли Yandex Cloud) +YANDEX_CLOUD_API_KEY=your_api_key_here +# ID папки в Yandex Cloud (получить в консоли Yandex Cloud) +YANDEX_CLOUD_FOLDER_ID=your_folder_id_here diff --git a/go.mod b/go.mod index 9adc8f1..79044fc 100644 --- a/go.mod +++ b/go.mod @@ -10,11 +10,13 @@ require ( github.com/aws/aws-sdk-go-v2/service/s3 v1.86.0 github.com/doug-martin/goqu/v9 v9.19.0 github.com/gin-gonic/gin v1.10.1 - github.com/google/uuid v1.4.0 + github.com/google/uuid v1.6.0 github.com/joho/godotenv v1.5.1 github.com/mattn/go-sqlite3 v1.14.17 github.com/pressly/goose/v3 v3.15.1 github.com/stretchr/testify v1.10.0 + github.com/yandex-cloud/go-genproto v0.17.0 + google.golang.org/grpc v1.74.2 ) require ( @@ -54,10 +56,12 @@ require ( github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.12 // indirect golang.org/x/arch v0.8.0 // indirect - golang.org/x/crypto v0.23.0 // indirect - golang.org/x/net v0.25.0 // indirect - golang.org/x/sys v0.20.0 // indirect - golang.org/x/text v0.15.0 // indirect - google.golang.org/protobuf v1.34.1 // indirect + golang.org/x/crypto v0.38.0 // indirect + golang.org/x/net v0.40.0 // indirect + golang.org/x/sys v0.33.0 // indirect + golang.org/x/text v0.25.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect + google.golang.org/protobuf v1.36.7 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 949ec7f..e3afabf 100644 --- a/go.sum +++ b/go.sum @@ -60,6 +60,10 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ= github.com/gin-gonic/gin v1.10.1/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= @@ -72,11 +76,13 @@ github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LB github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= -github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -128,34 +134,54 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/yandex-cloud/go-genproto v0.17.0 h1:uQ5Lr8B/xIyY1KrOm7pItYY3YT/DL1O8gVaY03ouYKM= +github.com/yandex-cloud/go-genproto v0.17.0/go.mod h1:0LDD/IZLIUIV4iPH+YcF+jysO3jkSvADFGm4dCAuwQo= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= +go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= +go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= +go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= +go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs= +go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY= +go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis= +go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4= +go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= +go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= -golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= +golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= -golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= +golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= +golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= +golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.13.0 h1:Iey4qkscZuv0VvIt8E0neZjtPVQFSc870HQ448QgEmQ= -golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= -google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= +golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a h1:SGktgSolFCo75dnHJF2yMvnns6jCmHFJ0vE4Vn2JKvQ= +google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a/go.mod h1:a77HrdMjoeKbnd2jmgcWdaS++ZLZAEq3orIOAEIKiVw= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a h1:v2PbRU4K3llS09c7zodFpNePeamkAwG3mPrAery9VeE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/grpc v1.74.2 h1:WoosgB65DlWVC9FqI82dGsZhWFNBSLjQ84bjROOpMu4= +google.golang.org/grpc v1.74.2/go.mod h1:CtQ+BGjaAIXHs/5YS3i473GqwBBa1zGQNevxdeBEXrM= +google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= +google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/controller/http/transcribe.go b/internal/controller/http/transcribe.go index 8b470b8..59972ba 100644 --- a/internal/controller/http/transcribe.go +++ b/internal/controller/http/transcribe.go @@ -12,6 +12,7 @@ import ( "git.vakhrushev.me/av/transcriber/internal/repo" "git.vakhrushev.me/av/transcriber/internal/repo/ffmpeg" "git.vakhrushev.me/av/transcriber/internal/service/s3" + "git.vakhrushev.me/av/transcriber/internal/service/speechkit" "github.com/gin-gonic/gin" "github.com/google/uuid" ) @@ -31,9 +32,10 @@ type CreateTranscribeJobResponse struct { } type GetTranscribeJobResponse struct { - JobID string `json:"job_id"` - State string `json:"status"` - CreatedAt time.Time `json:"created_at"` + JobID string `json:"job_id"` + State string `json:"status"` + CreatedAt time.Time `json:"created_at"` + TranscriptionText *string `json:"transcription_text,omitempty"` } func (h *TranscribeHandler) CreateTranscribeJob(c *gin.Context) { @@ -123,9 +125,10 @@ func (h *TranscribeHandler) GetTranscribeJobStatus(c *gin.Context) { } c.JSON(http.StatusOK, GetTranscribeJobResponse{ - JobID: job.Id, - State: job.State, - CreatedAt: job.CreatedAt, + JobID: job.Id, + State: job.State, + CreatedAt: job.CreatedAt, + TranscriptionText: job.TranscriptionText, }) } @@ -233,7 +236,7 @@ func (h *TranscribeHandler) RunUploadJob(c *gin.Context) { } job.FileID = &destFileId - job.MoveToState(entity.StateTranscribeReady) + job.MoveToState(entity.StateUploaded) // Сохраняем информацию о загрузке файла на S3 err = h.fileRepo.Create(destFileRecord) @@ -251,3 +254,113 @@ func (h *TranscribeHandler) RunUploadJob(c *gin.Context) { c.Status(http.StatusOK) } + +func (h *TranscribeHandler) RunRecognitionJob(c *gin.Context) { + acquisitionId := uuid.NewString() + rottingTime := time.Now().Add(-1 * time.Hour) + + job, err := h.jobRepo.FindAndAcquire(entity.StateUploaded, acquisitionId, rottingTime) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + fileRecord, err := h.fileRepo.GetByID(*job.FileID) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + // Создаем SpeechKit сервис + speechKitService, err := speechkit.NewSpeechKitService() + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to initialize SpeechKit service: " + err.Error()}) + return + } + + // Формируем S3 URI для файла + bucketName := os.Getenv("S3_BUCKET_NAME") + s3URI := fmt.Sprintf("https://storage.yandexcloud.net/%s/%s", bucketName, fileRecord.FileName) + + // Запускаем асинхронное распознавание + operationID, err := speechKitService.RecognizeFileFromS3(s3URI) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start recognition: " + err.Error()}) + return + } + + // Обновляем задачу с ID операции распознавания + job.RecognitionOpID = &operationID + job.MoveToState(entity.StateTranscribe) + + err = h.jobRepo.Save(job) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()}) + return + } + + c.Status(http.StatusOK) +} + +func (h *TranscribeHandler) RunRecognitionCheckJob(c *gin.Context) { + acquisitionId := uuid.NewString() + rottingTime := time.Now().Add(-1 * time.Hour) + + job, err := h.jobRepo.FindAndAcquire(entity.StateTranscribe, acquisitionId, rottingTime) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + if job.RecognitionOpID == nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "No recognition operation ID found"}) + return + } + + // Создаем SpeechKit сервис + speechKitService, err := speechkit.NewSpeechKitService() + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to initialize SpeechKit service: " + err.Error()}) + return + } + + // Проверяем статус операции + operation, err := speechKitService.CheckOperationStatus(*job.RecognitionOpID) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to check operation status: " + err.Error()}) + return + } + + if !operation.Done { + // Операция еще не завершена, переводим в состояние ожидания + err = h.jobRepo.Save(job) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()}) + return + } + c.Status(http.StatusOK) + return + } + + // Операция завершена, получаем результат + responses, err := speechKitService.GetRecognitionResult(*job.RecognitionOpID) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get recognition result: " + err.Error()}) + return + } + + // Извлекаем текст из результатов + transcriptionText := speechkit.ExtractTranscriptionText(responses) + + // Обновляем задачу с результатом + job.TranscriptionText = &transcriptionText + job.MoveToState(entity.StateDone) + + err = h.jobRepo.Save(job) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()}) + return + } + + c.Status(http.StatusOK) +} diff --git a/internal/entity/job.go b/internal/entity/job.go index c2df303..b9ebcf1 100644 --- a/internal/entity/job.go +++ b/internal/entity/job.go @@ -5,24 +5,25 @@ import ( ) type TranscribeJob struct { - Id string - State string - FileID *string - IsError bool - ErrorText *string - Worker *string - AcquiredAt *time.Time - CreatedAt time.Time + Id string + State string + FileID *string + IsError bool + ErrorText *string + Worker *string + AcquiredAt *time.Time + CreatedAt time.Time + RecognitionOpID *string // ID операции распознавания в Yandex Cloud + TranscriptionText *string // Результат распознавания } const ( - StateCreated = "created" - StateConverted = "converted" - StateUploaded = "uploaded" - StateTranscribeReady = "transcribe_ready" - StateTranscribeWait = "transcribe_wait" - StateDone = "done" - StatusFailed = "failed" + StateCreated = "created" + StateConverted = "converted" + StateUploaded = "uploaded" + StateTranscribe = "transcribe" + StateDone = "done" + StatusFailed = "failed" ) func (j *TranscribeJob) MoveToState(state string) { diff --git a/internal/repo/sqlite/transcript_job_repo.go b/internal/repo/sqlite/transcript_job_repo.go index f9ee623..cd69e7b 100644 --- a/internal/repo/sqlite/transcript_job_repo.go +++ b/internal/repo/sqlite/transcript_job_repo.go @@ -21,14 +21,16 @@ func NewTranscriptJobRepository(db *sql.DB, gq *goqu.Database) *TranscriptJobRep func (repo *TranscriptJobRepository) Create(job *entity.TranscribeJob) error { record := goqu.Record{ - "id": job.Id, - "state": job.State, - "file_id": job.FileID, - "is_error": job.IsError, - "error_text": job.ErrorText, - "worker": job.Worker, - "acquired_at": job.AcquiredAt, - "created_at": job.CreatedAt, + "id": job.Id, + "state": job.State, + "file_id": job.FileID, + "is_error": job.IsError, + "error_text": job.ErrorText, + "worker": job.Worker, + "acquired_at": job.AcquiredAt, + "created_at": job.CreatedAt, + "recognition_op_id": job.RecognitionOpID, + "transcription_text": job.TranscriptionText, } query := repo.gq.Insert("transcribe_jobs").Rows(record) sql, args, err := query.ToSQL() @@ -46,12 +48,14 @@ func (repo *TranscriptJobRepository) Create(job *entity.TranscribeJob) error { func (repo *TranscriptJobRepository) Save(job *entity.TranscribeJob) error { record := goqu.Record{ - "state": job.State, - "file_id": job.FileID, - "is_error": job.IsError, - "error_text": job.ErrorText, - "worker": job.Worker, - "acquired_at": job.AcquiredAt, + "state": job.State, + "file_id": job.FileID, + "is_error": job.IsError, + "error_text": job.ErrorText, + "worker": job.Worker, + "acquired_at": job.AcquiredAt, + "recognition_op_id": job.RecognitionOpID, + "transcription_text": job.TranscriptionText, } query := repo.gq.Update("transcribe_jobs").Set(record).Where(goqu.C("id").Eq(job.Id)) sql, args, err := query.ToSQL() @@ -77,6 +81,8 @@ func (repo *TranscriptJobRepository) GetByID(id string) (*entity.TranscribeJob, "worker", "acquired_at", "created_at", + "recognition_op_id", + "transcription_text", ).Where(goqu.C("id").Eq(id)) sql, args, err := query.ToSQL() if err != nil { @@ -93,6 +99,8 @@ func (repo *TranscriptJobRepository) GetByID(id string) (*entity.TranscribeJob, &job.Worker, &job.AcquiredAt, &job.CreatedAt, + &job.RecognitionOpID, + &job.TranscriptionText, ) if err != nil { return nil, fmt.Errorf("failed to get transcribe job: %w", err) @@ -154,6 +162,8 @@ func (repo *TranscriptJobRepository) FindAndAcquire(state, acquisitionId string, "worker", "acquired_at", "created_at", + "recognition_op_id", + "transcription_text", ).Where(goqu.C("worker").Eq(acquisitionId)) sql, args, err = selectQuery.ToSQL() @@ -171,6 +181,8 @@ func (repo *TranscriptJobRepository) FindAndAcquire(state, acquisitionId string, &job.Worker, &job.AcquiredAt, &job.CreatedAt, + &job.RecognitionOpID, + &job.TranscriptionText, ) if err != nil { return nil, fmt.Errorf("failed to get transcribe job: %w", err) diff --git a/internal/service/speechkit/speechkit.go b/internal/service/speechkit/speechkit.go new file mode 100644 index 0000000..57070e7 --- /dev/null +++ b/internal/service/speechkit/speechkit.go @@ -0,0 +1,160 @@ +package speechkit + +import ( + "context" + "fmt" + "os" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/metadata" + + stt "github.com/yandex-cloud/go-genproto/yandex/cloud/ai/stt/v3" + "github.com/yandex-cloud/go-genproto/yandex/cloud/operation" +) + +const ( + SpeechKitEndpoint = "stt.api.cloud.yandex.net:443" +) + +type SpeechKitService struct { + conn *grpc.ClientConn + sttClient stt.AsyncRecognizerClient + opClient operation.OperationServiceClient + apiKey string + folderID string +} + +func NewSpeechKitService() (*SpeechKitService, error) { + apiKey := os.Getenv("YANDEX_CLOUD_API_KEY") + folderID := os.Getenv("YANDEX_CLOUD_FOLDER_ID") + + if apiKey == "" || folderID == "" { + return nil, fmt.Errorf("missing required Yandex Cloud environment variables") + } + + // Создаем защищенное соединение + creds := credentials.NewTLS(nil) + conn, err := grpc.NewClient(SpeechKitEndpoint, grpc.WithTransportCredentials(creds)) + if err != nil { + return nil, fmt.Errorf("failed to connect to SpeechKit: %w", err) + } + + sttClient := stt.NewAsyncRecognizerClient(conn) + opClient := operation.NewOperationServiceClient(conn) + + return &SpeechKitService{ + conn: conn, + sttClient: sttClient, + opClient: opClient, + apiKey: apiKey, + folderID: folderID, + }, nil +} + +func (s *SpeechKitService) Close() error { + return s.conn.Close() +} + +// RecognizeFileFromS3 запускает асинхронное распознавание файла из S3 +func (s *SpeechKitService) RecognizeFileFromS3(s3URI string) (string, error) { + ctx := context.Background() + + // Добавляем авторизацию в контекст + ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey) + + // Создаем запрос на распознавание + req := &stt.RecognizeFileRequest{ + AudioSource: &stt.RecognizeFileRequest_Uri{ + Uri: s3URI, + }, + RecognitionModel: &stt.RecognitionModelOptions{ + Model: "general", // Используем общую модель + AudioFormat: &stt.AudioFormatOptions{ + AudioFormat: &stt.AudioFormatOptions_ContainerAudio{ + ContainerAudio: &stt.ContainerAudio{ + ContainerAudioType: stt.ContainerAudio_OGG_OPUS, + }, + }, + }, + TextNormalization: &stt.TextNormalizationOptions{ + TextNormalization: stt.TextNormalizationOptions_TEXT_NORMALIZATION_ENABLED, + ProfanityFilter: false, + LiteratureText: true, + }, + AudioProcessingType: stt.RecognitionModelOptions_FULL_DATA, + }, + SpeakerLabeling: &stt.SpeakerLabelingOptions{ + SpeakerLabeling: stt.SpeakerLabelingOptions_SPEAKER_LABELING_ENABLED, + }, + } + + // Отправляем запрос + op, err := s.sttClient.RecognizeFile(ctx, req) + if err != nil { + return "", fmt.Errorf("failed to start recognition: %w", err) + } + + return op.Id, nil +} + +// GetRecognitionResult получает результат распознавания по ID операции +func (s *SpeechKitService) GetRecognitionResult(operationID string) ([]*stt.StreamingResponse, error) { + ctx := context.Background() + + // Добавляем авторизацию в контекст + ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey) + + req := &stt.GetRecognitionRequest{ + OperationId: operationID, + } + + stream, err := s.sttClient.GetRecognition(ctx, req) + if err != nil { + return nil, fmt.Errorf("failed to get recognition stream: %w", err) + } + + var responses []*stt.StreamingResponse + for { + resp, err := stream.Recv() + if err != nil { + if err.Error() == "EOF" { + break + } + return nil, fmt.Errorf("failed to receive recognition response: %w", err) + } + responses = append(responses, resp) + } + + return responses, nil +} + +// CheckOperationStatus проверяет статус операции распознавания +func (s *SpeechKitService) CheckOperationStatus(operationID string) (*operation.Operation, error) { + ctx := context.Background() + + op, err := s.opClient.Get(ctx, &operation.GetOperationRequest{ + OperationId: operationID, + }) + + if err != nil { + return nil, fmt.Errorf("failed to get operation status: %w", err) + } + + return op, nil +} + +// ExtractTranscriptionText извлекает текст из результатов распознавания +func ExtractTranscriptionText(responses []*stt.StreamingResponse) string { + var fullText string + + for _, resp := range responses { + if final := resp.GetFinal(); final != nil { + for _, alt := range final.Alternatives { + fullText += alt.Text + " " + } + } + } + + return fullText +} diff --git a/main.go b/main.go index 1ed719e..ee0f83b 100644 --- a/main.go +++ b/main.go @@ -61,6 +61,8 @@ func main() { api.POST("/transcribe/convert", transcribeHandler.RunConversionJob) api.POST("/transcribe/upload", transcribeHandler.RunUploadJob) + api.POST("/transcribe/recognize", transcribeHandler.RunRecognitionJob) + api.POST("/transcribe/check", transcribeHandler.RunRecognitionCheckJob) } // Добавляем middleware для обработки больших файлов diff --git a/migrations/002_create_transcribe_jobs_table.sql b/migrations/002_create_transcribe_jobs_table.sql index b0185b7..55b658e 100644 --- a/migrations/002_create_transcribe_jobs_table.sql +++ b/migrations/002_create_transcribe_jobs_table.sql @@ -3,11 +3,19 @@ CREATE TABLE transcribe_jobs ( id TEXT PRIMARY KEY, state TEXT NOT NULL, file_id TEXT, + is_error BOOLEAN NOT NULL, error_text TEXT, - worker TEXT, - acquired_at DATETIME, + + acquisition_id TEXT, + acquire_time DATETIME, + delay_time DATETIME, + + recognition_op_id TEXT, + transcription_text TEXT, + created_at DATETIME NOT NULL, + updated_at DATETIME NOT NULL, FOREIGN KEY (file_id) REFERENCES files(id) );