Распознавание файлов и структуры с помощью LLM
This commit is contained in:
@@ -0,0 +1,237 @@
|
||||
package recognize
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.vakhrushev.me/av/jellybit/internal/llm"
|
||||
)
|
||||
|
||||
// fakeLLM отдаёт заранее заданные ответы/ошибки по порядку вызовов.
|
||||
type fakeLLM struct {
|
||||
responses []string
|
||||
errs []error
|
||||
calls int
|
||||
lastReq llm.Request
|
||||
}
|
||||
|
||||
func (f *fakeLLM) Complete(_ context.Context, req llm.Request) (llm.Response, error) {
|
||||
f.lastReq = req
|
||||
i := f.calls
|
||||
f.calls++
|
||||
if i < len(f.errs) && f.errs[i] != nil {
|
||||
return llm.Response{}, f.errs[i]
|
||||
}
|
||||
content := ""
|
||||
switch {
|
||||
case i < len(f.responses):
|
||||
content = f.responses[i]
|
||||
case len(f.responses) > 0:
|
||||
content = f.responses[len(f.responses)-1]
|
||||
}
|
||||
return llm.Response{Content: content}, nil
|
||||
}
|
||||
|
||||
func testLogger() *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
}
|
||||
|
||||
func TestRecognize_Movie(t *testing.T) {
|
||||
in := Input{
|
||||
Name: "The.Matrix.1999.1080p.BluRay.x264",
|
||||
Context: "научная фантастика",
|
||||
Files: []File{
|
||||
{Path: "The.Matrix.1999/movie.mkv", Size: 8 << 30},
|
||||
{Path: "The.Matrix.1999/sample.mkv", Size: 50 << 20},
|
||||
},
|
||||
}
|
||||
resp := `{"type":"movie","title":"The Matrix","original_title":"","year":1999,
|
||||
"provider_hint":"The Matrix 1999","confidence":0.9,"notes":"",
|
||||
"files":[
|
||||
{"src":"The.Matrix.1999/movie.mkv","role":"main","season":null,"episode":null},
|
||||
{"src":"The.Matrix.1999/sample.mkv","role":"sample","season":null,"episode":null}
|
||||
]}`
|
||||
f := &fakeLLM{responses: []string{resp}}
|
||||
r := New(f, Config{MaxRetries: 2}, testLogger())
|
||||
|
||||
res, err := r.Recognize(context.Background(), in)
|
||||
if err != nil {
|
||||
t.Fatalf("Recognize: %v", err)
|
||||
}
|
||||
if res.Plan.Type != MediaMovie || res.Plan.Title != "The Matrix" || res.Plan.Year != 1999 {
|
||||
t.Errorf("plan = %+v", res.Plan)
|
||||
}
|
||||
if res.Attempts != 1 {
|
||||
t.Errorf("attempts = %d, want 1", res.Attempts)
|
||||
}
|
||||
if res.Decision.Auto {
|
||||
t.Error("auto must be false in Ф2 (no DB match)")
|
||||
}
|
||||
if len(res.Decision.Reasons) == 0 {
|
||||
t.Error("expected at least the no-DB-match reason")
|
||||
}
|
||||
// Чистая структура: единственная причина — отсутствие матча в базе.
|
||||
if len(res.Decision.Reasons) != 1 {
|
||||
t.Errorf("unexpected extra warnings: %v", res.Decision.Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecognize_Series(t *testing.T) {
|
||||
in := Input{
|
||||
Name: "Avatar.The.Last.Airbender.Book.2",
|
||||
Files: []File{
|
||||
{Path: "Avatar/01.mkv", Size: 200 << 20},
|
||||
{Path: "Avatar/02.mkv", Size: 200 << 20},
|
||||
{Path: "Avatar/03.mkv", Size: 200 << 20},
|
||||
},
|
||||
}
|
||||
resp := `{"type":"series","title":"Avatar: The Last Airbender","year":2006,
|
||||
"confidence":0.8,"files":[
|
||||
{"src":"Avatar/01.mkv","role":"episode","season":2,"episode":1},
|
||||
{"src":"Avatar/02.mkv","role":"episode","season":2,"episode":2},
|
||||
{"src":"Avatar/03.mkv","role":"episode","season":2,"episode":3}
|
||||
]}`
|
||||
f := &fakeLLM{responses: []string{resp}}
|
||||
r := New(f, Config{}, testLogger())
|
||||
|
||||
res, err := r.Recognize(context.Background(), in)
|
||||
if err != nil {
|
||||
t.Fatalf("Recognize: %v", err)
|
||||
}
|
||||
if res.Plan.Type != MediaSeries || len(res.Plan.Files) != 3 {
|
||||
t.Errorf("plan = %+v", res.Plan)
|
||||
}
|
||||
if len(res.Decision.Reasons) != 1 {
|
||||
t.Errorf("clean series should warn only about DB match, got: %v", res.Decision.Reasons)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecognize_RetriesOnBadSrcThenSucceeds(t *testing.T) {
|
||||
in := Input{
|
||||
Name: "Some.Movie.2020",
|
||||
Files: []File{{Path: "movie/film.mkv", Size: 4 << 30}},
|
||||
}
|
||||
bad := `{"type":"movie","title":"Some Movie","files":[
|
||||
{"src":"movie/WRONG.mkv","role":"main"}]}`
|
||||
good := `{"type":"movie","title":"Some Movie","year":2020,"files":[
|
||||
{"src":"movie/film.mkv","role":"main"}]}`
|
||||
f := &fakeLLM{responses: []string{bad, good}}
|
||||
r := New(f, Config{MaxRetries: 2}, testLogger())
|
||||
|
||||
res, err := r.Recognize(context.Background(), in)
|
||||
if err != nil {
|
||||
t.Fatalf("Recognize: %v", err)
|
||||
}
|
||||
if res.Attempts != 2 {
|
||||
t.Errorf("attempts = %d, want 2", res.Attempts)
|
||||
}
|
||||
if res.Plan.Title != "Some Movie" {
|
||||
t.Errorf("plan = %+v", res.Plan)
|
||||
}
|
||||
// Корректирующее сообщение должно содержать схему и список файлов.
|
||||
last := f.lastReq.Messages[len(f.lastReq.Messages)-1]
|
||||
if !strings.Contains(last.Content, "Ответ не принят") || !strings.Contains(last.Content, "film.mkv") {
|
||||
t.Errorf("correction message missing context: %q", last.Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecognize_ExhaustedRetriesGoesToReview(t *testing.T) {
|
||||
in := Input{Name: "x", Files: []File{{Path: "a.mkv", Size: 1}}}
|
||||
bad := `not a json at all`
|
||||
f := &fakeLLM{responses: []string{bad}}
|
||||
r := New(f, Config{MaxRetries: 2}, testLogger())
|
||||
|
||||
res, err := r.Recognize(context.Background(), in)
|
||||
if err != nil {
|
||||
t.Fatalf("Recognize should not error on unparsed response: %v", err)
|
||||
}
|
||||
if f.calls != 3 { // 1 + 2 ретрая
|
||||
t.Errorf("calls = %d, want 3", f.calls)
|
||||
}
|
||||
if res.Decision.Auto || len(res.Decision.Reasons) == 0 {
|
||||
t.Errorf("expected review with reason, got %+v", res.Decision)
|
||||
}
|
||||
if !strings.Contains(res.Decision.Reasons[0], "не разобран") {
|
||||
t.Errorf("reason = %q", res.Decision.Reasons[0])
|
||||
}
|
||||
if res.Raw != bad {
|
||||
t.Errorf("raw = %q, want last response", res.Raw)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecognize_TransportErrorPropagates(t *testing.T) {
|
||||
in := Input{Name: "x", Files: []File{{Path: "a.mkv", Size: 1}}}
|
||||
wantErr := errors.New("connection refused")
|
||||
f := &fakeLLM{errs: []error{wantErr}}
|
||||
r := New(f, Config{MaxRetries: 2}, testLogger())
|
||||
|
||||
_, err := r.Recognize(context.Background(), in)
|
||||
if err == nil || !errors.Is(err, wantErr) {
|
||||
t.Fatalf("err = %v, want wrapped %v", err, wantErr)
|
||||
}
|
||||
if f.calls != 1 {
|
||||
t.Errorf("calls = %d, want 1 (transport errors not retried here)", f.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecognize_PromptCarriesSignals(t *testing.T) {
|
||||
in := Input{
|
||||
Name: "Some.Show.S01",
|
||||
Context: "сериал от HBO",
|
||||
Hints: []string{"это второй сезон", ""},
|
||||
Files: []File{{Path: "ep1.mkv", Size: 1 << 30}},
|
||||
}
|
||||
resp := `{"type":"series","title":"Some Show","files":[
|
||||
{"src":"ep1.mkv","role":"episode","season":1,"episode":1}]}`
|
||||
f := &fakeLLM{responses: []string{resp}}
|
||||
r := New(f, Config{}, testLogger())
|
||||
if _, err := r.Recognize(context.Background(), in); err != nil {
|
||||
t.Fatalf("Recognize: %v", err)
|
||||
}
|
||||
|
||||
if len(f.lastReq.Messages) != 2 {
|
||||
t.Fatalf("want system+user, got %d messages", len(f.lastReq.Messages))
|
||||
}
|
||||
user := f.lastReq.Messages[1].Content
|
||||
for _, want := range []string{"Some.Show.S01", "сериал от HBO", "это второй сезон", "ep1.mkv"} {
|
||||
if !strings.Contains(user, want) {
|
||||
t.Errorf("user prompt missing %q\n%s", want, user)
|
||||
}
|
||||
}
|
||||
if !f.lastReq.JSONMode {
|
||||
t.Error("JSONMode must be set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecognize_FileListTruncated(t *testing.T) {
|
||||
files := make([]File, 250)
|
||||
planFiles := make([]string, 0, 250)
|
||||
for i := range files {
|
||||
files[i] = File{Path: pathOf(i), Size: 100 << 20}
|
||||
}
|
||||
// План ссылается только на первый файл — этого достаточно для схемы.
|
||||
_ = planFiles
|
||||
in := Input{Name: "Big.Pack", Files: files}
|
||||
resp := `{"type":"series","title":"Big","files":[{"src":"` + pathOf(0) +
|
||||
`","role":"episode","season":1,"episode":1}]}`
|
||||
f := &fakeLLM{responses: []string{resp}}
|
||||
r := New(f, Config{MaxFiles: 100}, testLogger())
|
||||
if _, err := r.Recognize(context.Background(), in); err != nil {
|
||||
t.Fatalf("Recognize: %v", err)
|
||||
}
|
||||
user := f.lastReq.Messages[1].Content
|
||||
if !strings.Contains(user, "усечён") {
|
||||
t.Errorf("expected truncation note in prompt")
|
||||
}
|
||||
if !strings.Contains(user, "Файлы (250") {
|
||||
t.Errorf("expected total count 250 in prompt")
|
||||
}
|
||||
}
|
||||
|
||||
func pathOf(i int) string {
|
||||
return "show/ep" + itoa(i) + ".mkv"
|
||||
}
|
||||
Reference in New Issue
Block a user