Files
transcriber/internal/adapter/telegram/split.go

63 lines
1.5 KiB
Go

package telegram
// splitMessageByWords splits a message into parts of maxLen UTF-8 characters
// splitting by words to avoid cutting words in the middle
func (s *TelegramMessageSender) splitMessageByWords(text string, maxLen int) []string {
var parts []string
// If text is already short enough, return as is
if len([]rune(text)) <= maxLen {
return []string{text}
}
runes := []rune(text)
for len(runes) > 0 {
// Determine the end position for this part
end := len(runes)
if end > maxLen {
end = maxLen
}
// Try to find a good split point (word boundary)
splitPoint := end
for i := end - 1; i > end-20 && i > 0; i-- { // Look back up to 20 characters
// Check if this is a good split point (after a space)
if runes[i] == ' ' {
splitPoint = i + 1 // Include the space in the previous part
break
}
}
// If we couldn't find a good split point, just split at maxLen
if splitPoint == end && end == maxLen {
// Check if we're in the middle of a word
if end < len(runes) && runes[end] != ' ' && runes[end-1] != ' ' {
// Try to find a split point going forward
for i := end; i < len(runes) && i < end+20; i++ {
if runes[i] == ' ' {
splitPoint = i
break
}
}
}
}
// If still no good split point, use the original end
if splitPoint > len(runes) {
splitPoint = len(runes)
}
// Add this part
parts = append(parts, string(runes[:splitPoint]))
// Move to the next part
if splitPoint >= len(runes) {
break
}
runes = runes[splitPoint:]
}
return parts
}