Split long messages when send to telegram

2025-08-17 15:53:47 +03:00
parent 12b16b3749
commit 822e1680fb
3 changed files with 217 additions and 0 deletions
@@ -6,6 +6,10 @@ import (
 	tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5"
 )

+const (
+	TextLengthLimit = 4000
+)
+
 type TelegramMessageSender struct {
 	bot    *tgbotapi.BotAPI
 	logger *slog.Logger
@@ -24,6 +28,32 @@ func NewTelegramMessageSender(botToken string, logger *slog.Logger) (*TelegramMe
 }

 func (s *TelegramMessageSender) Send(text string, chatId int64, replyToMessageId *int) error {
+	// If message is short enough, send it directly
+	if len([]rune(text)) <= TextLengthLimit {
+		return s.sendSingleMessage(text, chatId, replyToMessageId)
+	}
+
+	// Split long message into parts
+	parts := s.splitMessageByWords(text, TextLengthLimit)
+
+	// Send each part
+	for i, part := range parts {
+		var replyId *int
+		// Only use replyToMessageId for the first part
+		if i == 0 {
+			replyId = replyToMessageId
+		}
+		err := s.sendSingleMessage(part, chatId, replyId)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// sendSingleMessage sends a single message
+func (s *TelegramMessageSender) sendSingleMessage(text string, chatId int64, replyToMessageId *int) error {
 	resultMsg := tgbotapi.NewMessage(chatId, text)
 	if replyToMessageId != nil {
 		resultMsg.ReplyToMessageID = *replyToMessageId
@@ -0,0 +1,62 @@
+package telegram
+
+// splitMessageByWords splits a message into parts of maxLen UTF-8 characters
+// splitting by words to avoid cutting words in the middle
+func (s *TelegramMessageSender) splitMessageByWords(text string, maxLen int) []string {
+	var parts []string
+
+	// If text is already short enough, return as is
+	if len([]rune(text)) <= maxLen {
+		return []string{text}
+	}
+
+	runes := []rune(text)
+
+	for len(runes) > 0 {
+		// Determine the end position for this part
+		end := len(runes)
+		if end > maxLen {
+			end = maxLen
+		}
+
+		// Try to find a good split point (word boundary)
+		splitPoint := end
+		for i := end - 1; i > end-20 && i > 0; i-- { // Look back up to 20 characters
+			// Check if this is a good split point (after a space)
+			if runes[i] == ' ' {
+				splitPoint = i + 1 // Include the space in the previous part
+				break
+			}
+		}
+
+		// If we couldn't find a good split point, just split at maxLen
+		if splitPoint == end && end == maxLen {
+			// Check if we're in the middle of a word
+			if end < len(runes) && runes[end] != ' ' && runes[end-1] != ' ' {
+				// Try to find a split point going forward
+				for i := end; i < len(runes) && i < end+20; i++ {
+					if runes[i] == ' ' {
+						splitPoint = i
+						break
+					}
+				}
+			}
+		}
+
+		// If still no good split point, use the original end
+		if splitPoint > len(runes) {
+			splitPoint = len(runes)
+		}
+
+		// Add this part
+		parts = append(parts, string(runes[:splitPoint]))
+
+		// Move to the next part
+		if splitPoint >= len(runes) {
+			break
+		}
+		runes = runes[splitPoint:]
+	}
+
+	return parts
+}
@@ -0,0 +1,125 @@
+package telegram
+
+import (
+	"testing"
+)
+
+func TestTelegramMessageSender_splitMessageByWords(t *testing.T) {
+	sender := &TelegramMessageSender{}
+
+	tests := []struct {
+		name     string
+		text     string
+		maxLen   int
+		expected []string
+	}{
+		{
+			name:   "Short text should return as is",
+			text:   "Привет мир",
+			maxLen: 25,
+			expected: []string{
+				"Привет мир",
+			},
+		},
+		{
+			name:   "Text exactly at limit",
+			text:   "Это тестовый текст который ровно соответствует лимиту",
+			maxLen: 35,
+			expected: []string{
+				"Это тестовый текст который ровно ",
+				"соответствует ",
+				"лимиту",
+			},
+		},
+		{
+			name:   "Text with word boundaries",
+			text:   "Это очень длинный текст для проверки работы функции разделения сообщения",
+			maxLen: 25,
+			expected: []string{
+				"Это очень длинный текст ",
+				"для проверки работы ",
+				"функции разделения ",
+				"сообщения",
+			},
+		},
+		{
+			name:   "Text with long words",
+			text:   "Этот текст содержит оченьдлинноеслово которое не должно быть разбито",
+			maxLen: 20,
+			expected: []string{
+				"Этот текст содержит ",
+				"оченьдлинноеслово ",
+				"которое не должно ",
+				"быть ",
+				"разбито",
+			},
+		},
+		{
+			name:   "Text with multiple spaces",
+			text:   "Этот   текст   имеет   много   пробелов",
+			maxLen: 20,
+			expected: []string{
+				"Этот   текст   ",
+				"имеет   много   ",
+				"пробелов",
+			},
+		},
+		{
+			name:   "Text with Russian characters and punctuation",
+			text:   "Привет! Как дела? Это тестовая строка для проверки работы функции.",
+			maxLen: 25,
+			expected: []string{
+				"Привет! Как дела? Это ",
+				"тестовая строка для ",
+				"проверки работы ",
+				"функции.",
+			},
+		},
+		{
+			name:   "Single word longer than maxLen",
+			text:   "Некотороедлинноеслово",
+			maxLen: 10,
+			expected: []string{
+				"Некотороед",
+				"линноеслов",
+				"о",
+			},
+		},
+		{
+			name:   "Text with mixed Russian and English",
+			text:   "Привет Hello мир World текст для проверки",
+			maxLen: 20,
+			expected: []string{
+				"Привет Hello мир ",
+				"World текст для ",
+				"проверки",
+			},
+		},
+		{
+			name:   "Text with special characters",
+			text:   "Тест с символами: @#$%^&*()_+-=[]{}|;':\",./<>?",
+			maxLen: 25,
+			expected: []string{
+				"Тест с символами: ",
+				"@#$%^&*()_+-=[]{}|;':\",./",
+				"<>?",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := sender.splitMessageByWords(tt.text, tt.maxLen)
+			if len(result) != len(tt.expected) {
+				t.Errorf("splitMessageByWords() length = %d, want %d", len(result), len(tt.expected))
+				return
+			}
+
+			for i, expectedPart := range tt.expected {
+				if result[i] != expectedPart {
+					t.Errorf("splitMessageByWords() part %d = %q, want %q", i, result[i], expectedPart)
+				}
+			}
+		})
+	}
+}