Add initial audio recognition requests
This commit is contained in:
@@ -13,3 +13,9 @@ S3_BUCKET_NAME=your_bucket_name
|
||||
|
||||
# Кастомный endpoint для S3 (оставить пустым для AWS S3, заполнить для MinIO или других S3-совместимых сервисов)
|
||||
S3_ENDPOINT=
|
||||
|
||||
# Yandex Cloud Speech-to-Text Configuration
|
||||
# API ключ для доступа к Yandex Cloud (получить в консоли Yandex Cloud)
|
||||
YANDEX_CLOUD_API_KEY=your_api_key_here
|
||||
# ID папки в Yandex Cloud (получить в консоли Yandex Cloud)
|
||||
YANDEX_CLOUD_FOLDER_ID=your_folder_id_here
|
||||
|
16
go.mod
16
go.mod
@@ -10,11 +10,13 @@ require (
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.86.0
|
||||
github.com/doug-martin/goqu/v9 v9.19.0
|
||||
github.com/gin-gonic/gin v1.10.1
|
||||
github.com/google/uuid v1.4.0
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/mattn/go-sqlite3 v1.14.17
|
||||
github.com/pressly/goose/v3 v3.15.1
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/yandex-cloud/go-genproto v0.17.0
|
||||
google.golang.org/grpc v1.74.2
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -54,10 +56,12 @@ require (
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
golang.org/x/arch v0.8.0 // indirect
|
||||
golang.org/x/crypto v0.23.0 // indirect
|
||||
golang.org/x/net v0.25.0 // indirect
|
||||
golang.org/x/sys v0.20.0 // indirect
|
||||
golang.org/x/text v0.15.0 // indirect
|
||||
google.golang.org/protobuf v1.34.1 // indirect
|
||||
golang.org/x/crypto v0.38.0 // indirect
|
||||
golang.org/x/net v0.40.0 // indirect
|
||||
golang.org/x/sys v0.33.0 // indirect
|
||||
golang.org/x/text v0.25.0 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect
|
||||
google.golang.org/protobuf v1.36.7 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
66
go.sum
66
go.sum
@@ -60,6 +60,10 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE
|
||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||
github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ=
|
||||
github.com/gin-gonic/gin v1.10.1/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
@@ -72,11 +76,13 @@ github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LB
|
||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
|
||||
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
|
||||
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
@@ -128,34 +134,54 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||
github.com/yandex-cloud/go-genproto v0.17.0 h1:uQ5Lr8B/xIyY1KrOm7pItYY3YT/DL1O8gVaY03ouYKM=
|
||||
github.com/yandex-cloud/go-genproto v0.17.0/go.mod h1:0LDD/IZLIUIV4iPH+YcF+jysO3jkSvADFGm4dCAuwQo=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
|
||||
go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E=
|
||||
go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE=
|
||||
go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs=
|
||||
go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs=
|
||||
go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4=
|
||||
go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w=
|
||||
go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA=
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
|
||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
|
||||
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
|
||||
golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
|
||||
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
|
||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||
golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
|
||||
golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
|
||||
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
|
||||
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
|
||||
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
|
||||
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/tools v0.13.0 h1:Iey4qkscZuv0VvIt8E0neZjtPVQFSc870HQ448QgEmQ=
|
||||
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
||||
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
|
||||
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a h1:SGktgSolFCo75dnHJF2yMvnns6jCmHFJ0vE4Vn2JKvQ=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a/go.mod h1:a77HrdMjoeKbnd2jmgcWdaS++ZLZAEq3orIOAEIKiVw=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a h1:v2PbRU4K3llS09c7zodFpNePeamkAwG3mPrAery9VeE=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
|
||||
google.golang.org/grpc v1.74.2 h1:WoosgB65DlWVC9FqI82dGsZhWFNBSLjQ84bjROOpMu4=
|
||||
google.golang.org/grpc v1.74.2/go.mod h1:CtQ+BGjaAIXHs/5YS3i473GqwBBa1zGQNevxdeBEXrM=
|
||||
google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
|
||||
google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
@@ -12,6 +12,7 @@ import (
|
||||
"git.vakhrushev.me/av/transcriber/internal/repo"
|
||||
"git.vakhrushev.me/av/transcriber/internal/repo/ffmpeg"
|
||||
"git.vakhrushev.me/av/transcriber/internal/service/s3"
|
||||
"git.vakhrushev.me/av/transcriber/internal/service/speechkit"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -31,9 +32,10 @@ type CreateTranscribeJobResponse struct {
|
||||
}
|
||||
|
||||
type GetTranscribeJobResponse struct {
|
||||
JobID string `json:"job_id"`
|
||||
State string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
JobID string `json:"job_id"`
|
||||
State string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
TranscriptionText *string `json:"transcription_text,omitempty"`
|
||||
}
|
||||
|
||||
func (h *TranscribeHandler) CreateTranscribeJob(c *gin.Context) {
|
||||
@@ -123,9 +125,10 @@ func (h *TranscribeHandler) GetTranscribeJobStatus(c *gin.Context) {
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, GetTranscribeJobResponse{
|
||||
JobID: job.Id,
|
||||
State: job.State,
|
||||
CreatedAt: job.CreatedAt,
|
||||
JobID: job.Id,
|
||||
State: job.State,
|
||||
CreatedAt: job.CreatedAt,
|
||||
TranscriptionText: job.TranscriptionText,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -233,7 +236,7 @@ func (h *TranscribeHandler) RunUploadJob(c *gin.Context) {
|
||||
}
|
||||
|
||||
job.FileID = &destFileId
|
||||
job.MoveToState(entity.StateTranscribeReady)
|
||||
job.MoveToState(entity.StateUploaded)
|
||||
|
||||
// Сохраняем информацию о загрузке файла на S3
|
||||
err = h.fileRepo.Create(destFileRecord)
|
||||
@@ -251,3 +254,113 @@ func (h *TranscribeHandler) RunUploadJob(c *gin.Context) {
|
||||
|
||||
c.Status(http.StatusOK)
|
||||
}
|
||||
|
||||
func (h *TranscribeHandler) RunRecognitionJob(c *gin.Context) {
|
||||
acquisitionId := uuid.NewString()
|
||||
rottingTime := time.Now().Add(-1 * time.Hour)
|
||||
|
||||
job, err := h.jobRepo.FindAndAcquire(entity.StateUploaded, acquisitionId, rottingTime)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
fileRecord, err := h.fileRepo.GetByID(*job.FileID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Создаем SpeechKit сервис
|
||||
speechKitService, err := speechkit.NewSpeechKitService()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to initialize SpeechKit service: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Формируем S3 URI для файла
|
||||
bucketName := os.Getenv("S3_BUCKET_NAME")
|
||||
s3URI := fmt.Sprintf("https://storage.yandexcloud.net/%s/%s", bucketName, fileRecord.FileName)
|
||||
|
||||
// Запускаем асинхронное распознавание
|
||||
operationID, err := speechKitService.RecognizeFileFromS3(s3URI)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start recognition: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Обновляем задачу с ID операции распознавания
|
||||
job.RecognitionOpID = &operationID
|
||||
job.MoveToState(entity.StateTranscribe)
|
||||
|
||||
err = h.jobRepo.Save(job)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.Status(http.StatusOK)
|
||||
}
|
||||
|
||||
func (h *TranscribeHandler) RunRecognitionCheckJob(c *gin.Context) {
|
||||
acquisitionId := uuid.NewString()
|
||||
rottingTime := time.Now().Add(-1 * time.Hour)
|
||||
|
||||
job, err := h.jobRepo.FindAndAcquire(entity.StateTranscribe, acquisitionId, rottingTime)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if job.RecognitionOpID == nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "No recognition operation ID found"})
|
||||
return
|
||||
}
|
||||
|
||||
// Создаем SpeechKit сервис
|
||||
speechKitService, err := speechkit.NewSpeechKitService()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to initialize SpeechKit service: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Проверяем статус операции
|
||||
operation, err := speechKitService.CheckOperationStatus(*job.RecognitionOpID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to check operation status: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if !operation.Done {
|
||||
// Операция еще не завершена, переводим в состояние ожидания
|
||||
err = h.jobRepo.Save(job)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
|
||||
return
|
||||
}
|
||||
c.Status(http.StatusOK)
|
||||
return
|
||||
}
|
||||
|
||||
// Операция завершена, получаем результат
|
||||
responses, err := speechKitService.GetRecognitionResult(*job.RecognitionOpID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get recognition result: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Извлекаем текст из результатов
|
||||
transcriptionText := speechkit.ExtractTranscriptionText(responses)
|
||||
|
||||
// Обновляем задачу с результатом
|
||||
job.TranscriptionText = &transcriptionText
|
||||
job.MoveToState(entity.StateDone)
|
||||
|
||||
err = h.jobRepo.Save(job)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update job: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.Status(http.StatusOK)
|
||||
}
|
||||
|
@@ -5,24 +5,25 @@ import (
|
||||
)
|
||||
|
||||
type TranscribeJob struct {
|
||||
Id string
|
||||
State string
|
||||
FileID *string
|
||||
IsError bool
|
||||
ErrorText *string
|
||||
Worker *string
|
||||
AcquiredAt *time.Time
|
||||
CreatedAt time.Time
|
||||
Id string
|
||||
State string
|
||||
FileID *string
|
||||
IsError bool
|
||||
ErrorText *string
|
||||
Worker *string
|
||||
AcquiredAt *time.Time
|
||||
CreatedAt time.Time
|
||||
RecognitionOpID *string // ID операции распознавания в Yandex Cloud
|
||||
TranscriptionText *string // Результат распознавания
|
||||
}
|
||||
|
||||
const (
|
||||
StateCreated = "created"
|
||||
StateConverted = "converted"
|
||||
StateUploaded = "uploaded"
|
||||
StateTranscribeReady = "transcribe_ready"
|
||||
StateTranscribeWait = "transcribe_wait"
|
||||
StateDone = "done"
|
||||
StatusFailed = "failed"
|
||||
StateCreated = "created"
|
||||
StateConverted = "converted"
|
||||
StateUploaded = "uploaded"
|
||||
StateTranscribe = "transcribe"
|
||||
StateDone = "done"
|
||||
StatusFailed = "failed"
|
||||
)
|
||||
|
||||
func (j *TranscribeJob) MoveToState(state string) {
|
||||
|
@@ -21,14 +21,16 @@ func NewTranscriptJobRepository(db *sql.DB, gq *goqu.Database) *TranscriptJobRep
|
||||
|
||||
func (repo *TranscriptJobRepository) Create(job *entity.TranscribeJob) error {
|
||||
record := goqu.Record{
|
||||
"id": job.Id,
|
||||
"state": job.State,
|
||||
"file_id": job.FileID,
|
||||
"is_error": job.IsError,
|
||||
"error_text": job.ErrorText,
|
||||
"worker": job.Worker,
|
||||
"acquired_at": job.AcquiredAt,
|
||||
"created_at": job.CreatedAt,
|
||||
"id": job.Id,
|
||||
"state": job.State,
|
||||
"file_id": job.FileID,
|
||||
"is_error": job.IsError,
|
||||
"error_text": job.ErrorText,
|
||||
"worker": job.Worker,
|
||||
"acquired_at": job.AcquiredAt,
|
||||
"created_at": job.CreatedAt,
|
||||
"recognition_op_id": job.RecognitionOpID,
|
||||
"transcription_text": job.TranscriptionText,
|
||||
}
|
||||
query := repo.gq.Insert("transcribe_jobs").Rows(record)
|
||||
sql, args, err := query.ToSQL()
|
||||
@@ -46,12 +48,14 @@ func (repo *TranscriptJobRepository) Create(job *entity.TranscribeJob) error {
|
||||
|
||||
func (repo *TranscriptJobRepository) Save(job *entity.TranscribeJob) error {
|
||||
record := goqu.Record{
|
||||
"state": job.State,
|
||||
"file_id": job.FileID,
|
||||
"is_error": job.IsError,
|
||||
"error_text": job.ErrorText,
|
||||
"worker": job.Worker,
|
||||
"acquired_at": job.AcquiredAt,
|
||||
"state": job.State,
|
||||
"file_id": job.FileID,
|
||||
"is_error": job.IsError,
|
||||
"error_text": job.ErrorText,
|
||||
"worker": job.Worker,
|
||||
"acquired_at": job.AcquiredAt,
|
||||
"recognition_op_id": job.RecognitionOpID,
|
||||
"transcription_text": job.TranscriptionText,
|
||||
}
|
||||
query := repo.gq.Update("transcribe_jobs").Set(record).Where(goqu.C("id").Eq(job.Id))
|
||||
sql, args, err := query.ToSQL()
|
||||
@@ -77,6 +81,8 @@ func (repo *TranscriptJobRepository) GetByID(id string) (*entity.TranscribeJob,
|
||||
"worker",
|
||||
"acquired_at",
|
||||
"created_at",
|
||||
"recognition_op_id",
|
||||
"transcription_text",
|
||||
).Where(goqu.C("id").Eq(id))
|
||||
sql, args, err := query.ToSQL()
|
||||
if err != nil {
|
||||
@@ -93,6 +99,8 @@ func (repo *TranscriptJobRepository) GetByID(id string) (*entity.TranscribeJob,
|
||||
&job.Worker,
|
||||
&job.AcquiredAt,
|
||||
&job.CreatedAt,
|
||||
&job.RecognitionOpID,
|
||||
&job.TranscriptionText,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get transcribe job: %w", err)
|
||||
@@ -154,6 +162,8 @@ func (repo *TranscriptJobRepository) FindAndAcquire(state, acquisitionId string,
|
||||
"worker",
|
||||
"acquired_at",
|
||||
"created_at",
|
||||
"recognition_op_id",
|
||||
"transcription_text",
|
||||
).Where(goqu.C("worker").Eq(acquisitionId))
|
||||
|
||||
sql, args, err = selectQuery.ToSQL()
|
||||
@@ -171,6 +181,8 @@ func (repo *TranscriptJobRepository) FindAndAcquire(state, acquisitionId string,
|
||||
&job.Worker,
|
||||
&job.AcquiredAt,
|
||||
&job.CreatedAt,
|
||||
&job.RecognitionOpID,
|
||||
&job.TranscriptionText,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get transcribe job: %w", err)
|
||||
|
160
internal/service/speechkit/speechkit.go
Normal file
160
internal/service/speechkit/speechkit.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package speechkit
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/metadata"
|
||||
|
||||
stt "github.com/yandex-cloud/go-genproto/yandex/cloud/ai/stt/v3"
|
||||
"github.com/yandex-cloud/go-genproto/yandex/cloud/operation"
|
||||
)
|
||||
|
||||
const (
|
||||
SpeechKitEndpoint = "stt.api.cloud.yandex.net:443"
|
||||
)
|
||||
|
||||
type SpeechKitService struct {
|
||||
conn *grpc.ClientConn
|
||||
sttClient stt.AsyncRecognizerClient
|
||||
opClient operation.OperationServiceClient
|
||||
apiKey string
|
||||
folderID string
|
||||
}
|
||||
|
||||
func NewSpeechKitService() (*SpeechKitService, error) {
|
||||
apiKey := os.Getenv("YANDEX_CLOUD_API_KEY")
|
||||
folderID := os.Getenv("YANDEX_CLOUD_FOLDER_ID")
|
||||
|
||||
if apiKey == "" || folderID == "" {
|
||||
return nil, fmt.Errorf("missing required Yandex Cloud environment variables")
|
||||
}
|
||||
|
||||
// Создаем защищенное соединение
|
||||
creds := credentials.NewTLS(nil)
|
||||
conn, err := grpc.NewClient(SpeechKitEndpoint, grpc.WithTransportCredentials(creds))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to connect to SpeechKit: %w", err)
|
||||
}
|
||||
|
||||
sttClient := stt.NewAsyncRecognizerClient(conn)
|
||||
opClient := operation.NewOperationServiceClient(conn)
|
||||
|
||||
return &SpeechKitService{
|
||||
conn: conn,
|
||||
sttClient: sttClient,
|
||||
opClient: opClient,
|
||||
apiKey: apiKey,
|
||||
folderID: folderID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *SpeechKitService) Close() error {
|
||||
return s.conn.Close()
|
||||
}
|
||||
|
||||
// RecognizeFileFromS3 запускает асинхронное распознавание файла из S3
|
||||
func (s *SpeechKitService) RecognizeFileFromS3(s3URI string) (string, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Добавляем авторизацию в контекст
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey)
|
||||
|
||||
// Создаем запрос на распознавание
|
||||
req := &stt.RecognizeFileRequest{
|
||||
AudioSource: &stt.RecognizeFileRequest_Uri{
|
||||
Uri: s3URI,
|
||||
},
|
||||
RecognitionModel: &stt.RecognitionModelOptions{
|
||||
Model: "general", // Используем общую модель
|
||||
AudioFormat: &stt.AudioFormatOptions{
|
||||
AudioFormat: &stt.AudioFormatOptions_ContainerAudio{
|
||||
ContainerAudio: &stt.ContainerAudio{
|
||||
ContainerAudioType: stt.ContainerAudio_OGG_OPUS,
|
||||
},
|
||||
},
|
||||
},
|
||||
TextNormalization: &stt.TextNormalizationOptions{
|
||||
TextNormalization: stt.TextNormalizationOptions_TEXT_NORMALIZATION_ENABLED,
|
||||
ProfanityFilter: false,
|
||||
LiteratureText: true,
|
||||
},
|
||||
AudioProcessingType: stt.RecognitionModelOptions_FULL_DATA,
|
||||
},
|
||||
SpeakerLabeling: &stt.SpeakerLabelingOptions{
|
||||
SpeakerLabeling: stt.SpeakerLabelingOptions_SPEAKER_LABELING_ENABLED,
|
||||
},
|
||||
}
|
||||
|
||||
// Отправляем запрос
|
||||
op, err := s.sttClient.RecognizeFile(ctx, req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to start recognition: %w", err)
|
||||
}
|
||||
|
||||
return op.Id, nil
|
||||
}
|
||||
|
||||
// GetRecognitionResult получает результат распознавания по ID операции
|
||||
func (s *SpeechKitService) GetRecognitionResult(operationID string) ([]*stt.StreamingResponse, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Добавляем авторизацию в контекст
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey)
|
||||
|
||||
req := &stt.GetRecognitionRequest{
|
||||
OperationId: operationID,
|
||||
}
|
||||
|
||||
stream, err := s.sttClient.GetRecognition(ctx, req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get recognition stream: %w", err)
|
||||
}
|
||||
|
||||
var responses []*stt.StreamingResponse
|
||||
for {
|
||||
resp, err := stream.Recv()
|
||||
if err != nil {
|
||||
if err.Error() == "EOF" {
|
||||
break
|
||||
}
|
||||
return nil, fmt.Errorf("failed to receive recognition response: %w", err)
|
||||
}
|
||||
responses = append(responses, resp)
|
||||
}
|
||||
|
||||
return responses, nil
|
||||
}
|
||||
|
||||
// CheckOperationStatus проверяет статус операции распознавания
|
||||
func (s *SpeechKitService) CheckOperationStatus(operationID string) (*operation.Operation, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
op, err := s.opClient.Get(ctx, &operation.GetOperationRequest{
|
||||
OperationId: operationID,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get operation status: %w", err)
|
||||
}
|
||||
|
||||
return op, nil
|
||||
}
|
||||
|
||||
// ExtractTranscriptionText извлекает текст из результатов распознавания
|
||||
func ExtractTranscriptionText(responses []*stt.StreamingResponse) string {
|
||||
var fullText string
|
||||
|
||||
for _, resp := range responses {
|
||||
if final := resp.GetFinal(); final != nil {
|
||||
for _, alt := range final.Alternatives {
|
||||
fullText += alt.Text + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fullText
|
||||
}
|
2
main.go
2
main.go
@@ -61,6 +61,8 @@ func main() {
|
||||
|
||||
api.POST("/transcribe/convert", transcribeHandler.RunConversionJob)
|
||||
api.POST("/transcribe/upload", transcribeHandler.RunUploadJob)
|
||||
api.POST("/transcribe/recognize", transcribeHandler.RunRecognitionJob)
|
||||
api.POST("/transcribe/check", transcribeHandler.RunRecognitionCheckJob)
|
||||
}
|
||||
|
||||
// Добавляем middleware для обработки больших файлов
|
||||
|
@@ -3,11 +3,19 @@ CREATE TABLE transcribe_jobs (
|
||||
id TEXT PRIMARY KEY,
|
||||
state TEXT NOT NULL,
|
||||
file_id TEXT,
|
||||
|
||||
is_error BOOLEAN NOT NULL,
|
||||
error_text TEXT,
|
||||
worker TEXT,
|
||||
acquired_at DATETIME,
|
||||
|
||||
acquisition_id TEXT,
|
||||
acquire_time DATETIME,
|
||||
delay_time DATETIME,
|
||||
|
||||
recognition_op_id TEXT,
|
||||
transcription_text TEXT,
|
||||
|
||||
created_at DATETIME NOT NULL,
|
||||
updated_at DATETIME NOT NULL,
|
||||
|
||||
FOREIGN KEY (file_id) REFERENCES files(id)
|
||||
);
|
||||
|
Reference in New Issue
Block a user