Change adapters structure

This commit is contained in:
2025-08-13 09:23:30 +03:00
parent f6b5e835a4
commit bab563519c
9 changed files with 8 additions and 8 deletions

View File

@@ -0,0 +1,20 @@
package recognizer
import (
"git.vakhrushev.me/av/transcriber/internal/entity"
"github.com/google/uuid"
)
type MemoryAudioRecognizer struct{}
func (r *MemoryAudioRecognizer) RecognizeFile(filePath string) (operationID string, err error) {
return uuid.NewString(), nil
}
func (r *MemoryAudioRecognizer) GetRecognitionText(operationID string) (string, error) {
return "Foo bar, Baz.", nil
}
func (r *MemoryAudioRecognizer) CheckRecognitionStatus(operationID string) (*entity.RecognitionResult, error) {
return entity.NewCompletedResult(), nil
}

View File

@@ -0,0 +1,95 @@
package yandex
import (
"fmt"
"path/filepath"
"git.vakhrushev.me/av/transcriber/internal/entity"
)
type YandexAudioRecognizerConfig struct {
// s3
Region string
AccessKey string
SecretKey string
BucketName string
Endpoint string
// speech kit
ApiKey string
FolderID string
}
type YandexAudioRecognizerService struct {
s3Sevice *yandexS3Service
sttService *speechKitService
}
func NewYandexAudioRecognizerService(cfg YandexAudioRecognizerConfig) (*YandexAudioRecognizerService, error) {
s3, err := newYandexS3Service(s3Config{
Region: cfg.Region,
AccessKey: cfg.AccessKey,
SecretKey: cfg.SecretKey,
BucketName: cfg.BucketName,
Endpoint: cfg.Endpoint,
})
if err != nil {
return nil, err
}
stt, err := newSpeechKitService(speechKitConfig{
ApiKey: cfg.ApiKey,
FolderID: cfg.FolderID,
})
if err != nil {
return nil, err
}
return &YandexAudioRecognizerService{
s3Sevice: s3,
sttService: stt,
}, nil
}
func (s *YandexAudioRecognizerService) Close() error {
return s.sttService.Close()
}
func (s *YandexAudioRecognizerService) RecognizeFile(filePath string) (string, error) {
fileName := filepath.Base(filePath)
err := s.s3Sevice.uploadFile(filePath, fileName)
if err != nil {
return "", err
}
uri := s.s3Sevice.fileUrl(fileName)
opId, err := s.sttService.recognizeFileFromS3(uri)
if err != nil {
return "", err
}
return opId, nil
}
func (s *YandexAudioRecognizerService) GetRecognitionText(operationID string) (string, error) {
return s.sttService.getRecognitionText(operationID)
}
func (s *YandexAudioRecognizerService) CheckRecognitionStatus(operationID string) (*entity.RecognitionResult, error) {
operation, err := s.sttService.checkOperationStatus(operationID)
if err != nil {
return nil, err
}
if !operation.Done {
return entity.NewInProgressResult(), nil
}
if opErr := operation.GetError(); opErr != nil {
errorText := fmt.Sprintf("operation failed: code %d, message: %s", opErr.Code, opErr.Message)
return entity.NewFailedResult(errorText), nil
}
return entity.NewCompletedResult(), nil
}

View File

@@ -0,0 +1,90 @@
package yandex
import (
"context"
"fmt"
"os"
"strings"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
"github.com/aws/aws-sdk-go-v2/service/s3"
)
type s3Config struct {
Region string
AccessKey string
SecretKey string
BucketName string
Endpoint string
}
type yandexS3Service struct {
client *s3.Client
uploader *manager.Uploader
bucketName string
endpoint string
}
func newYandexS3Service(cfg s3Config) (*yandexS3Service, error) {
if cfg.Region == "" || cfg.AccessKey == "" || cfg.SecretKey == "" || cfg.BucketName == "" {
return nil, fmt.Errorf("missing required S3 configuration parameters")
}
// Создаем конфигурацию
awsCfg, err := config.LoadDefaultConfig(context.Background(),
config.WithRegion(cfg.Region),
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.AccessKey, cfg.SecretKey, "")),
)
if err != nil {
return nil, fmt.Errorf("failed to load AWS config: %w", err)
}
// Создаем клиент S3
var client *s3.Client
if cfg.Endpoint != "" {
// Кастомный endpoint (например, для MinIO)
client = s3.NewFromConfig(awsCfg, func(o *s3.Options) {
o.BaseEndpoint = aws.String(cfg.Endpoint)
o.UsePathStyle = true
})
} else {
// Стандартный AWS S3
client = s3.NewFromConfig(awsCfg)
}
uploader := manager.NewUploader(client)
return &yandexS3Service{
client: client,
uploader: uploader,
bucketName: cfg.BucketName,
endpoint: cfg.Endpoint,
}, nil
}
func (s *yandexS3Service) uploadFile(filePath, fileName string) error {
file, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("failed to open file %s: %w", filePath, err)
}
defer file.Close()
_, err = s.uploader.Upload(context.Background(), &s3.PutObjectInput{
Bucket: aws.String(s.bucketName),
Key: aws.String(fileName),
Body: file,
})
if err != nil {
return fmt.Errorf("failed to upload file to S3: %w", err)
}
return nil
}
func (s *yandexS3Service) fileUrl(fileName string) string {
endpoint := strings.TrimRight(s.endpoint, "/")
return fmt.Sprintf("%s/%s/%s", endpoint, s.bucketName, fileName)
}

View File

@@ -0,0 +1,185 @@
package yandex
import (
"context"
"fmt"
"strings"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/metadata"
stt "github.com/yandex-cloud/go-genproto/yandex/cloud/ai/stt/v3"
"github.com/yandex-cloud/go-genproto/yandex/cloud/operation"
)
const (
SpeechKitEndpoint = "stt.api.cloud.yandex.net:443"
OperationEndpoint = "operation.api.cloud.yandex.net:443"
RecognitionModel = "deferred-general"
)
type speechKitConfig struct {
ApiKey string
FolderID string
}
type speechKitService struct {
sttConn *grpc.ClientConn
opConn *grpc.ClientConn
sttClient stt.AsyncRecognizerClient
opClient operation.OperationServiceClient
apiKey string
folderID string
}
func newSpeechKitService(cfg speechKitConfig) (*speechKitService, error) {
apiKey := cfg.ApiKey
folderID := cfg.FolderID
if apiKey == "" || folderID == "" {
return nil, fmt.Errorf("missing required Yandex Cloud environment variables")
}
// Создаем защищенное соединение для SpeechKit
creds := credentials.NewTLS(nil)
sttConn, err := grpc.NewClient(SpeechKitEndpoint, grpc.WithTransportCredentials(creds))
if err != nil {
return nil, fmt.Errorf("failed to connect to SpeechKit: %w", err)
}
// Создаем защищенное соединение для Operations API
opConn, err := grpc.NewClient(OperationEndpoint, grpc.WithTransportCredentials(creds))
if err != nil {
sttConn.Close()
return nil, fmt.Errorf("failed to connect to Operations API: %w", err)
}
sttClient := stt.NewAsyncRecognizerClient(sttConn)
opClient := operation.NewOperationServiceClient(opConn)
return &speechKitService{
sttConn: sttConn,
opConn: opConn,
sttClient: sttClient,
opClient: opClient,
apiKey: apiKey,
folderID: folderID,
}, nil
}
func (s *speechKitService) Close() error {
var err1, err2 error
if s.sttConn != nil {
err1 = s.sttConn.Close()
}
if s.opConn != nil {
err2 = s.opConn.Close()
}
if err1 != nil {
return err1
}
return err2
}
// recognizeFileFromS3 запускает асинхронное распознавание файла из S3
func (s *speechKitService) recognizeFileFromS3(s3URI string) (string, error) {
ctx := context.Background()
// Добавляем авторизацию и folder_id в контекст
ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey)
ctx = metadata.AppendToOutgoingContext(ctx, "x-folder-id", s.folderID)
// Создаем запрос на распознавание
req := &stt.RecognizeFileRequest{
AudioSource: &stt.RecognizeFileRequest_Uri{
Uri: s3URI,
},
RecognitionModel: &stt.RecognitionModelOptions{
Model: RecognitionModel,
AudioFormat: &stt.AudioFormatOptions{
AudioFormat: &stt.AudioFormatOptions_ContainerAudio{
ContainerAudio: &stt.ContainerAudio{
ContainerAudioType: stt.ContainerAudio_OGG_OPUS,
},
},
},
TextNormalization: &stt.TextNormalizationOptions{
TextNormalization: stt.TextNormalizationOptions_TEXT_NORMALIZATION_ENABLED,
ProfanityFilter: false,
LiteratureText: true,
},
AudioProcessingType: stt.RecognitionModelOptions_FULL_DATA,
},
SpeakerLabeling: &stt.SpeakerLabelingOptions{
SpeakerLabeling: stt.SpeakerLabelingOptions_SPEAKER_LABELING_ENABLED,
},
}
// Отправляем запрос
op, err := s.sttClient.RecognizeFile(ctx, req)
if err != nil {
return "", fmt.Errorf("failed to start recognition: %w", err)
}
return op.Id, nil
}
// GetRecognitionResult получает результат распознавания по ID операции
func (s *speechKitService) getRecognitionText(operationID string) (string, error) {
ctx := context.Background()
// Добавляем авторизацию и folder_id в контекст
ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey)
ctx = metadata.AppendToOutgoingContext(ctx, "x-folder-id", s.folderID)
req := &stt.GetRecognitionRequest{
OperationId: operationID,
}
stream, err := s.sttClient.GetRecognition(ctx, req)
if err != nil {
return "", fmt.Errorf("failed to get recognition stream: %w", err)
}
var sb strings.Builder
for {
resp, err := stream.Recv()
if err != nil {
if err.Error() == "EOF" {
break
}
return "", fmt.Errorf("failed to receive recognition response: %w", err)
}
if refinement := resp.GetFinalRefinement(); refinement != nil {
if text := refinement.GetNormalizedText(); text != nil {
for _, alt := range text.Alternatives {
sb.WriteString(alt.Text)
sb.WriteString(" ")
}
}
}
}
return sb.String(), nil
}
// checkOperationStatus проверяет статус операции распознавания
func (s *speechKitService) checkOperationStatus(operationID string) (*operation.Operation, error) {
ctx := context.Background()
ctx = metadata.AppendToOutgoingContext(ctx, "authorization", "Api-Key "+s.apiKey)
ctx = metadata.AppendToOutgoingContext(ctx, "x-folder-id", s.folderID)
op, err := s.opClient.Get(ctx, &operation.GetOperationRequest{
OperationId: operationID,
})
if err != nil {
return nil, fmt.Errorf("failed to get operation status: %w", err)
}
return op, nil
}