transcriber/internal/adapter/telegram/split.go

package telegram

// splitMessageByWords splits a message into parts of maxLen UTF-8 characters
// splitting by words to avoid cutting words in the middle
func (s *TelegramMessageSender) splitMessageByWords(text string, maxLen int) []string {
	var parts []string

	// If text is already short enough, return as is
	if len([]rune(text)) <= maxLen {
		return []string{text}
	}

	runes := []rune(text)

	for len(runes) > 0 {
		// Determine the end position for this part
		end := len(runes)
		if end > maxLen {
			end = maxLen
		}

		// Try to find a good split point (word boundary)
		splitPoint := end
		for i := end - 1; i > end-20 && i > 0; i-- { // Look back up to 20 characters
			// Check if this is a good split point (after a space)
			if runes[i] == ' ' {
				splitPoint = i + 1 // Include the space in the previous part
				break
			}
		}

		// If we couldn't find a good split point, just split at maxLen
		if splitPoint == end && end == maxLen {
			// Check if we're in the middle of a word
			if end < len(runes) && runes[end] != ' ' && runes[end-1] != ' ' {
				// Try to find a split point going forward
				for i := end; i < len(runes) && i < end+20; i++ {
					if runes[i] == ' ' {
						splitPoint = i
						break
					}
				}
			}
		}

		// If still no good split point, use the original end
		if splitPoint > len(runes) {
			splitPoint = len(runes)
		}

		// Add this part
		parts = append(parts, string(runes[:splitPoint]))

		// Move to the next part
		if splitPoint >= len(runes) {
			break
		}
		runes = runes[splitPoint:]
	}

	return parts
}