Распознавание файлов и структуры с помощью LLM

2026-06-14 12:48:08 +03:00
parent 2ec0cf9747
commit 91c501624a
9 changed files with 1097 additions and 4 deletions
@@ -0,0 +1,224 @@
+// Package recognize по сигналам торрента определяет фильм/сериал, строит
+// план раскладки и оценивает уверенность.
+//
+// Конвейер (см. docs/specs/recognition.md):
+//  1. пред-парс имени релиза (go-ptn) — черновые название/год/сезон/серия;
+//  2. вызов LLM со структурированным выводом → план в нашей схеме;
+//  3. валидация плана в Go (схема + структура + согласованность сигналов);
+//  4. решение «авто или review».
+//
+// Ф2 не сверяется с метабазами (TMDB/TVDB — Ф4) и ничего не пишет на диск:
+// без подтверждённого матча в базе авто-раскладка не делается, поэтому в
+// этой фазе решение всегда «review». Выход LLM недоверенный — план
+// принимается только если каждый files[].src совпадает с реальным файлом
+// торрента; итоговая безопасность пути держится на раскладке (Ф3).
+package recognize
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	"git.vakhrushev.me/av/jellybit/internal/llm"
+)
+
+// MediaType — вид контента.
+type MediaType string
+
+const (
+	MediaMovie  MediaType = "movie"
+	MediaSeries MediaType = "series"
+)
+
+// FileRole — роль файла в раздаче.
+type FileRole string
+
+const (
+	RoleMain     FileRole = "main"     // основной видеофайл фильма
+	RoleEpisode  FileRole = "episode"  // серия сериала
+	RoleSubtitle FileRole = "subtitle" // внешние субтитры
+	RoleExtra    FileRole = "extra"    // допматериалы
+	RoleSample   FileRole = "sample"   // семпл
+	RoleIgnore   FileRole = "ignore"   // мусор/не нужное
+)
+
+func (r FileRole) valid() bool {
+	switch r {
+	case RoleMain, RoleEpisode, RoleSubtitle, RoleExtra, RoleSample, RoleIgnore:
+		return true
+	default:
+		return false
+	}
+}
+
+// File — входной файл торрента (путь относительно content_path и размер).
+type File struct {
+	Path string
+	Size int64
+}
+
+// Input — сигналы для распознавания одной раздачи.
+type Input struct {
+	Name    string   // имя торрента
+	Files   []File   // список файлов с размерами
+	Context string   // текстовый контекст человека (опц.)
+	Hints   []string // накопленные подсказки из review (Ф3; в Ф2 обычно пусто)
+}
+
+// PlanFile — файл в плане раскладки. Season/Episode заданы на файле, чтобы
+// выражать мультисезонные паки и спецвыпуски (см. recognition.md).
+type PlanFile struct {
+	Src     string   `json:"src"`
+	Role    FileRole `json:"role"`
+	Season  *int     `json:"season,omitempty"`
+	Episode *int     `json:"episode,omitempty"`
+}
+
+// Plan — структурированный результат распознавания (схема ответа LLM).
+type Plan struct {
+	Type          MediaType  `json:"type"`
+	Title         string     `json:"title"`
+	OriginalTitle string     `json:"original_title,omitempty"`
+	Year          int        `json:"year,omitempty"`
+	ProviderHint  string     `json:"provider_hint,omitempty"`
+	Files         []PlanFile `json:"files"`
+	Confidence    float64    `json:"confidence"`
+	Notes         string     `json:"notes,omitempty"`
+}
+
+// PreParse — черновой разбор имени релиза (go-ptn).
+type PreParse struct {
+	Title   string
+	Year    int
+	Season  int
+	Episode int
+	Quality string
+}
+
+// Decision — решение модели уверенности.
+type Decision struct {
+	Auto    bool     // авто-раскладка без review (в Ф2 всегда false)
+	Reasons []string // причины ухода в review / предупреждения валидации
+}
+
+// Result — итог распознавания.
+type Result struct {
+	Plan     Plan
+	PreParse PreParse
+	Decision Decision
+	Attempts int    // сколько вызовов LLM понадобилось (вкл. ретраи разбора)
+	Raw      string // сырой ответ LLM последней попытки (для recognition.raw_llm)
+}
+
+// LLM — нужная recognize часть провайдера.
+type LLM interface {
+	Complete(ctx context.Context, req llm.Request) (llm.Response, error)
+}
+
+// Config — параметры распознавания.
+type Config struct {
+	MaxRetries int // переразбор ответа со схемой-в-промпте ([llm].max_retries)
+	MaxTokens  int // лимит ответа модели (0 — дефолт)
+	MaxFiles   int // усечение списка файлов в промпте (0 — дефолт)
+}
+
+const (
+	defaultMaxTokens = 4000
+	defaultMaxFiles  = 100
+)
+
+// Recognizer — реализация распознавания.
+type Recognizer struct {
+	llm       LLM
+	maxRetry  int
+	maxTokens int
+	maxFiles  int
+	log       *slog.Logger
+}
+
+// New собирает распознаватель.
+func New(provider LLM, cfg Config, log *slog.Logger) *Recognizer {
+	maxTokens := cfg.MaxTokens
+	if maxTokens <= 0 {
+		maxTokens = defaultMaxTokens
+	}
+	maxFiles := cfg.MaxFiles
+	if maxFiles <= 0 {
+		maxFiles = defaultMaxFiles
+	}
+	retries := cfg.MaxRetries
+	if retries < 0 {
+		retries = 0
+	}
+	return &Recognizer{
+		llm:       provider,
+		maxRetry:  retries,
+		maxTokens: maxTokens,
+		maxFiles:  maxFiles,
+		log:       log,
+	}
+}
+
+// Recognize прогоняет конвейер. Транспортная ошибка LLM возвращается как
+// error (наверху решат retry/failed). Неразобранный после ретраев ответ —
+// не ошибка, а Result с решением review (см. recognition.md).
+func (r *Recognizer) Recognize(ctx context.Context, in Input) (Result, error) {
+	pre := preParse(in.Name)
+	msgs := buildMessages(in, pre, r.maxFiles)
+
+	temp := 0.0
+	var raw string
+	var plan Plan
+	var parseErr error
+	attempts := 0
+
+	for attempt := 0; attempt <= r.maxRetry; attempt++ {
+		attempts++
+		resp, err := r.llm.Complete(ctx, llm.Request{
+			Messages:    msgs,
+			JSONMode:    true,
+			Temperature: &temp,
+			MaxTokens:   r.maxTokens,
+		})
+		if err != nil {
+			return Result{}, fmt.Errorf("recognize: llm complete: %w", err)
+		}
+		raw = resp.Content
+
+		plan, parseErr = parsePlan(raw, in)
+		if parseErr == nil {
+			break
+		}
+		r.log.Warn("recognize: unparsed llm response",
+			"attempt", attempts, "err", parseErr)
+		// Просим модель исправиться, повторяя схему и ошибку.
+		msgs = append(msgs,
+			llm.Message{Role: llm.RoleAssistant, Content: raw},
+			llm.Message{Role: llm.RoleUser, Content: correctionMessage(parseErr, in, r.maxFiles)})
+	}
+
+	if parseErr != nil {
+		return Result{
+			PreParse: pre,
+			Attempts: attempts,
+			Raw:      raw,
+			Decision: Decision{
+				Auto:    false,
+				Reasons: []string{"ответ LLM не разобран после " + itoa(attempts) + " попыток: " + parseErr.Error()},
+			},
+		}, nil
+	}
+
+	dec := decide(plan, pre)
+	r.log.Info("recognize: done",
+		"type", plan.Type, "title", plan.Title, "year", plan.Year,
+		"files", len(plan.Files), "attempts", attempts,
+		"auto", dec.Auto, "reasons", len(dec.Reasons))
+	return Result{
+		Plan:     plan,
+		PreParse: pre,
+		Decision: dec,
+		Attempts: attempts,
+		Raw:      raw,
+	}, nil
+}