Files
imgdownloader/main.go
Anton Vakhrushev 79a3e84e57 Rewrite source reading
Switch from json to lines
2025-08-01 10:04:26 +03:00

208 lines
4.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"bufio"
"context"
"flag"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"sync"
"time"
"golang.org/x/time/rate"
)
// HTTP клиент с таймаутом
var httpClient *http.Client
// Глобальный rate limiter
var limiter *rate.Limiter
func main() {
timeout := flag.Duration("t", 60*time.Second, "request timeout")
retries := flag.Int("r", 5, "number of download attempts")
jobs := flag.Int("j", 5, "concurrent downloads")
rateLimit := flag.Float64("rate", 0, "maximum downloads per second (0 = no limit)")
inputFile := "-"
var outputDir string
flag.Parse()
args := flag.Args()
if len(args) == 1 {
outputDir = args[0]
} else if len(args) == 2 {
inputFile = args[0]
outputDir = args[1]
} else {
fmt.Println("Usage: program [OPTIONS] [input-file|-] <output-dir>")
os.Exit(1)
}
// Создаем директорию для загрузок
if err := os.MkdirAll(outputDir, 0755); err != nil {
fmt.Printf("Error creating directory: %v\n", err)
os.Exit(1)
}
scanner, closer, err := createLineScanner(inputFile)
if err != nil {
fmt.Printf("Error reading source: %v\n", err)
os.Exit(1)
}
defer closer.Close()
httpClient = &http.Client{
Timeout: *timeout,
}
// Инициализируем rate limiter
if *rateLimit > 0 {
limiter = rate.NewLimiter(rate.Limit(*rateLimit), 1)
}
// Семафор для ограничения параллелизма
sem := make(chan struct{}, *jobs)
var wg sync.WaitGroup
start := time.Now()
idx := 0
for scanner.Scan() {
url := strings.TrimSpace(scanner.Text())
if url == "" {
continue
}
idx++
if !isValidUrl(url) {
fmt.Printf("Warning: invalid url, skip download: %s\n", url)
continue
}
wg.Add(1)
sem <- struct{}{} // Занимаем слот
go func(idx int, url string) {
defer wg.Done()
defer func() { <-sem }() // Освобождаем слот
filename := filepath.Join(outputDir, fmt.Sprintf("image_%06d%s", idx, fileExtension(url)))
if err := downloadImage(url, filename, *retries); err != nil {
fmt.Printf("Error downloading %s: %v\n", url, err)
} else {
fmt.Printf("Downloaded %s -> %s\n", url, filename)
}
}(idx, url)
}
wg.Wait()
if err := scanner.Err(); err != nil {
fmt.Printf("Error: invalid input: %v\n", err)
fmt.Printf("\nDownloaded %d images in %v\n", idx, time.Since(start))
os.Exit(1)
}
fmt.Printf("\nDownloaded %d images in %v\n", idx, time.Since(start))
}
func createLineScanner(inputFile string) (*bufio.Scanner, io.Closer, error) {
if inputFile == "-" {
return bufio.NewScanner(os.Stdin), io.NopCloser(nil), nil
}
file, err := os.Open(inputFile)
if err != nil {
return nil, nil, err
}
return bufio.NewScanner(file), file, nil
}
func isValidUrl(candidate string) bool {
_, err := url.ParseRequestURI(candidate)
return err == nil
}
// Определяем расширение файла по Content-Type
func fileExtension(url string) string {
contentTypes := map[string]string{
"image/jpeg": ".jpg",
"image/png": ".png",
"image/gif": ".gif",
"image/webp": ".webp",
"image/svg+xml": ".svg",
}
resp, err := httpClient.Head(url)
if err == nil {
ct := resp.Header.Get("Content-Type")
if ext, ok := contentTypes[ct]; ok {
return ext
}
}
return ".bin" // расширение по умолчанию
}
// Скачиваем и сохраняем изображение
func downloadImage(url, filename string, maxRetries int) error {
const retryDelay = 1 * time.Second
var lastErr error
for attempt := 1; attempt <= maxRetries; attempt++ {
// Попытка скачать изображение
err := attemptDownload(url, filename)
if err == nil {
// Успешно скачали
return nil
}
lastErr = err
if attempt < maxRetries {
fmt.Printf("Attempt %d failed for %s: %v. Retrying in %v...\n",
attempt, url, err, retryDelay)
time.Sleep(retryDelay)
}
}
return fmt.Errorf("all %d download attempts failed: %v", maxRetries, lastErr)
}
// Одна попытка скачивания
func attemptDownload(url, filename string) error {
// Применяем rate limiting если он включен
if limiter != nil {
if err := limiter.Wait(context.Background()); err != nil {
return fmt.Errorf("rate limiter error: %v", err)
}
}
resp, err := httpClient.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("HTTP error: %s", resp.Status)
}
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(file, resp.Body)
return err
}