Extract logic into transcribe service
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials"
|
||||
@@ -17,6 +18,8 @@ import (
|
||||
const (
|
||||
SpeechKitEndpoint = "stt.api.cloud.yandex.net:443"
|
||||
OperationEndpoint = "operation.api.cloud.yandex.net:443"
|
||||
|
||||
RecognitionModel = "deferred-general"
|
||||
)
|
||||
|
||||
type SpeechKitService struct {
|
||||
@@ -91,7 +94,7 @@ func (s *SpeechKitService) RecognizeFileFromS3(s3URI string) (string, error) {
|
||||
Uri: s3URI,
|
||||
},
|
||||
RecognitionModel: &stt.RecognitionModelOptions{
|
||||
Model: "general", // Используем общую модель
|
||||
Model: RecognitionModel,
|
||||
AudioFormat: &stt.AudioFormatOptions{
|
||||
AudioFormat: &stt.AudioFormatOptions_ContainerAudio{
|
||||
ContainerAudio: &stt.ContainerAudio{
|
||||
@@ -121,7 +124,7 @@ func (s *SpeechKitService) RecognizeFileFromS3(s3URI string) (string, error) {
|
||||
}
|
||||
|
||||
// GetRecognitionResult получает результат распознавания по ID операции
|
||||
func (s *SpeechKitService) GetRecognitionResult(operationID string) ([]*stt.StreamingResponse, error) {
|
||||
func (s *SpeechKitService) GetRecognitionText(operationID string) (string, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Добавляем авторизацию и folder_id в контекст
|
||||
@@ -134,22 +137,28 @@ func (s *SpeechKitService) GetRecognitionResult(operationID string) ([]*stt.Stre
|
||||
|
||||
stream, err := s.sttClient.GetRecognition(ctx, req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get recognition stream: %w", err)
|
||||
return "", fmt.Errorf("failed to get recognition stream: %w", err)
|
||||
}
|
||||
|
||||
var responses []*stt.StreamingResponse
|
||||
var sb strings.Builder
|
||||
|
||||
for {
|
||||
resp, err := stream.Recv()
|
||||
if err != nil {
|
||||
if err.Error() == "EOF" {
|
||||
break
|
||||
}
|
||||
return nil, fmt.Errorf("failed to receive recognition response: %w", err)
|
||||
return "", fmt.Errorf("failed to receive recognition response: %w", err)
|
||||
}
|
||||
if final := resp.GetFinal(); final != nil {
|
||||
for _, alt := range final.Alternatives {
|
||||
sb.WriteString(alt.Text)
|
||||
sb.WriteString(" ")
|
||||
}
|
||||
}
|
||||
responses = append(responses, resp)
|
||||
}
|
||||
|
||||
return responses, nil
|
||||
return sb.String(), nil
|
||||
}
|
||||
|
||||
// CheckOperationStatus проверяет статус операции распознавания
|
||||
@@ -171,18 +180,3 @@ func (s *SpeechKitService) CheckOperationStatus(operationID string) (*operation.
|
||||
|
||||
return op, nil
|
||||
}
|
||||
|
||||
// ExtractTranscriptionText извлекает текст из результатов распознавания
|
||||
func ExtractTranscriptionText(responses []*stt.StreamingResponse) string {
|
||||
var fullText string
|
||||
|
||||
for _, resp := range responses {
|
||||
if final := resp.GetFinal(); final != nil {
|
||||
for _, alt := range final.Alternatives {
|
||||
fullText += alt.Text + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fullText
|
||||
}
|
||||
|
Reference in New Issue
Block a user