Extract logic into transcribe service

This commit is contained in:
2025-08-12 10:59:51 +03:00
parent f625e21418
commit 2c9a5f4bfb
8 changed files with 354 additions and 356 deletions

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"log"
"os"
"strings"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
@@ -17,6 +18,8 @@ import (
const (
SpeechKitEndpoint = "stt.api.cloud.yandex.net:443"
OperationEndpoint = "operation.api.cloud.yandex.net:443"
RecognitionModel = "deferred-general"
)
type SpeechKitService struct {
@@ -91,7 +94,7 @@ func (s *SpeechKitService) RecognizeFileFromS3(s3URI string) (string, error) {
Uri: s3URI,
},
RecognitionModel: &stt.RecognitionModelOptions{
Model: "general", // Используем общую модель
Model: RecognitionModel,
AudioFormat: &stt.AudioFormatOptions{
AudioFormat: &stt.AudioFormatOptions_ContainerAudio{
ContainerAudio: &stt.ContainerAudio{
@@ -121,7 +124,7 @@ func (s *SpeechKitService) RecognizeFileFromS3(s3URI string) (string, error) {
}
// GetRecognitionResult получает результат распознавания по ID операции
func (s *SpeechKitService) GetRecognitionResult(operationID string) ([]*stt.StreamingResponse, error) {
func (s *SpeechKitService) GetRecognitionText(operationID string) (string, error) {
ctx := context.Background()
// Добавляем авторизацию и folder_id в контекст
@@ -134,22 +137,28 @@ func (s *SpeechKitService) GetRecognitionResult(operationID string) ([]*stt.Stre
stream, err := s.sttClient.GetRecognition(ctx, req)
if err != nil {
return nil, fmt.Errorf("failed to get recognition stream: %w", err)
return "", fmt.Errorf("failed to get recognition stream: %w", err)
}
var responses []*stt.StreamingResponse
var sb strings.Builder
for {
resp, err := stream.Recv()
if err != nil {
if err.Error() == "EOF" {
break
}
return nil, fmt.Errorf("failed to receive recognition response: %w", err)
return "", fmt.Errorf("failed to receive recognition response: %w", err)
}
if final := resp.GetFinal(); final != nil {
for _, alt := range final.Alternatives {
sb.WriteString(alt.Text)
sb.WriteString(" ")
}
}
responses = append(responses, resp)
}
return responses, nil
return sb.String(), nil
}
// CheckOperationStatus проверяет статус операции распознавания
@@ -171,18 +180,3 @@ func (s *SpeechKitService) CheckOperationStatus(operationID string) (*operation.
return op, nil
}
// ExtractTranscriptionText извлекает текст из результатов распознавания
func ExtractTranscriptionText(responses []*stt.StreamingResponse) string {
var fullText string
for _, resp := range responses {
if final := resp.GetFinal(); final != nil {
for _, alt := range final.Alternatives {
fullText += alt.Text + " "
}
}
}
return fullText
}