Rewrite source reading

Switch from json to lines
This commit is contained in:
2025-08-01 10:04:26 +03:00
parent e31f3120b9
commit 79a3e84e57
2 changed files with 69 additions and 29 deletions

95
main.go
View File

@@ -1,14 +1,16 @@
package main
import (
"bufio"
"context"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"sync"
"time"
@@ -27,17 +29,35 @@ func main() {
jobs := flag.Int("j", 5, "concurrent downloads")
rateLimit := flag.Float64("rate", 0, "maximum downloads per second (0 = no limit)")
inputFile := "-"
var outputDir string
flag.Parse()
args := flag.Args()
if len(args) != 2 {
fmt.Println("Usage: program <json-file> <output-dir>")
if len(args) == 1 {
outputDir = args[0]
} else if len(args) == 2 {
inputFile = args[0]
outputDir = args[1]
} else {
fmt.Println("Usage: program [OPTIONS] [input-file|-] <output-dir>")
os.Exit(1)
}
jsonFile := args[0]
outputDir := args[1]
// Создаем директорию для загрузок
if err := os.MkdirAll(outputDir, 0755); err != nil {
fmt.Printf("Error creating directory: %v\n", err)
os.Exit(1)
}
scanner, closer, err := createLineScanner(inputFile)
if err != nil {
fmt.Printf("Error reading source: %v\n", err)
os.Exit(1)
}
defer closer.Close()
httpClient = &http.Client{
Timeout: *timeout,
@@ -48,32 +68,26 @@ func main() {
limiter = rate.NewLimiter(rate.Limit(*rateLimit), 1)
}
// Создаем директорию для загрузок
if err := os.MkdirAll(outputDir, 0755); err != nil {
fmt.Printf("Error creating directory: %v\n", err)
os.Exit(1)
}
// Читаем JSON файл
data, err := os.ReadFile(jsonFile)
if err != nil {
fmt.Printf("Error reading JSON file: %v\n", err)
os.Exit(1)
}
// Парсим JSON в массив строк
var urls []string
if err := json.Unmarshal(data, &urls); err != nil {
fmt.Printf("Error parsing JSON: %v\n", err)
os.Exit(1)
}
// Семафор для ограничения параллелизма
sem := make(chan struct{}, *jobs)
var wg sync.WaitGroup
start := time.Now()
for i, url := range urls {
idx := 0
for scanner.Scan() {
url := strings.TrimSpace(scanner.Text())
if url == "" {
continue
}
idx++
if !isValidUrl(url) {
fmt.Printf("Warning: invalid url, skip download: %s\n", url)
continue
}
wg.Add(1)
sem <- struct{}{} // Занимаем слот
@@ -87,11 +101,36 @@ func main() {
} else {
fmt.Printf("Downloaded %s -> %s\n", url, filename)
}
}(i, url)
}(idx, url)
}
wg.Wait()
fmt.Printf("\nDownloaded %d images in %v\n", len(urls), time.Since(start))
if err := scanner.Err(); err != nil {
fmt.Printf("Error: invalid input: %v\n", err)
fmt.Printf("\nDownloaded %d images in %v\n", idx, time.Since(start))
os.Exit(1)
}
fmt.Printf("\nDownloaded %d images in %v\n", idx, time.Since(start))
}
func createLineScanner(inputFile string) (*bufio.Scanner, io.Closer, error) {
if inputFile == "-" {
return bufio.NewScanner(os.Stdin), io.NopCloser(nil), nil
}
file, err := os.Open(inputFile)
if err != nil {
return nil, nil, err
}
return bufio.NewScanner(file), file, nil
}
func isValidUrl(candidate string) bool {
_, err := url.ParseRequestURI(candidate)
return err == nil
}
// Определяем расширение файла по Content-Type