/*
 * Decompiled with CFR 0.152.
 */
package com.microsoft.semantickernel.text;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class TextChunker {
    private static final String s_spaceChar = " ";
    private static final List<Pattern> s_plaintextSplitOptions = Stream.of("[\n\r]", "\\.", "[\\?\\!]", ";", ":", ",", "[\\)\\]\\}]", " ", "\\-", null).map(it -> it == null ? null : Pattern.compile(it, 8)).collect(Collectors.toList());
    private static final List<Pattern> s_markdownSplitOptions = Stream.of("\\.", "[\\?\\!]", ";", ":", ",", "[\\)\\]\\}]", " ", "\\-", "[\n\r]", null).map(it -> it == null ? null : Pattern.compile(it, 8)).collect(Collectors.toList());

    public static List<String> splitPlainTextLines(String text, int maxTokensPerLine) {
        return TextChunker.internalSplitLines(text, maxTokensPerLine, true, s_plaintextSplitOptions);
    }

    public static List<String> splitMarkDownLines(String text, int maxTokensPerLine) {
        ArrayList<String> result = new ArrayList<String>();
        TextChunker.internalSplitLines(text, maxTokensPerLine, true, s_markdownSplitOptions);
        return result;
    }

    public static List<String> splitPlainTextParagraphs(List<String> lines, int maxTokensPerParagraph) {
        return TextChunker.internalSplitTextParagraphs(lines, maxTokensPerParagraph, text -> TextChunker.internalSplitLines(text, maxTokensPerParagraph, false, s_plaintextSplitOptions));
    }

    public static List<String> splitMarkdownParagraphs(List<String> lines, int maxTokensPerParagraph) {
        return TextChunker.internalSplitTextParagraphs(lines, maxTokensPerParagraph, text -> TextChunker.internalSplitLines(text, maxTokensPerParagraph, false, s_markdownSplitOptions));
    }

    private static List<String> internalSplitTextParagraphs(List<String> lines, int maxTokensPerParagraph, Function<String, List<String>> longLinesSplitter) {
        if (lines.isEmpty()) {
            return new ArrayList<String>();
        }
        ArrayList<String> truncatedLines = new ArrayList<String>();
        for (String line : lines) {
            truncatedLines.addAll((Collection<String>)longLinesSplitter.apply(line));
        }
        lines = truncatedLines;
        ArrayList<String> paragraphs = new ArrayList<String>();
        StringBuilder currentParagraph = new StringBuilder();
        for (String line : lines) {
            if (currentParagraph.length() > 0 && TextChunker.TokenCount(currentParagraph.length()) + TextChunker.TokenCount(line.length()) + 1 >= maxTokensPerParagraph) {
                paragraphs.add(currentParagraph.toString().trim());
                currentParagraph = new StringBuilder();
            }
            currentParagraph.append(line).append("\n");
        }
        if (currentParagraph.length() > 0) {
            paragraphs.add(currentParagraph.toString().trim());
        }
        if (paragraphs.size() > 1) {
            String lastParagraph = (String)paragraphs.get(paragraphs.size() - 1);
            String secondLastParagraph = (String)paragraphs.get(paragraphs.size() - 2);
            if (TextChunker.TokenCount(lastParagraph.length()) < maxTokensPerParagraph / 4) {
                int secondLastParagraphTokensCount;
                List lastParagraphTokens = Arrays.stream(lastParagraph.split(s_spaceChar)).filter(it -> !it.isEmpty()).collect(Collectors.toList());
                List secondLastParagraphTokens = Arrays.stream(secondLastParagraph.split(s_spaceChar)).filter(it -> !it.isEmpty()).collect(Collectors.toList());
                int lastParagraphTokensCount = lastParagraphTokens.size();
                if (lastParagraphTokensCount + (secondLastParagraphTokensCount = secondLastParagraphTokens.size()) <= maxTokensPerParagraph) {
                    int i;
                    StringBuilder newSecondLastParagraph = new StringBuilder();
                    for (i = 0; i < secondLastParagraphTokensCount; ++i) {
                        if (newSecondLastParagraph.length() != 0) {
                            newSecondLastParagraph.append(' ');
                        }
                        newSecondLastParagraph.append((String)secondLastParagraphTokens.get(i));
                    }
                    for (i = 0; i < lastParagraphTokensCount; ++i) {
                        if (newSecondLastParagraph.length() != 0) {
                            newSecondLastParagraph.append(' ');
                        }
                        newSecondLastParagraph.append((String)lastParagraphTokens.get(i));
                    }
                    paragraphs.set(paragraphs.size() - 2, newSecondLastParagraph.toString().trim());
                    paragraphs.remove(paragraphs.size() - 1);
                }
            }
        }
        return paragraphs;
    }

    private static List<String> internalSplitLines(String text, int maxTokensPerLine, boolean trim, List<Pattern> splitOptions) {
        text = text.replaceAll("\\r?\\n|\\r", "\n");
        SplitString result = TextChunker.split(text, maxTokensPerLine, Collections.singletonList(splitOptions.get(0)), trim);
        if (result.inputWasSplit) {
            for (int i = 1; i < splitOptions.size(); ++i) {
                result = TextChunker.split(result.result, maxTokensPerLine, Collections.singletonList(splitOptions.get(i)), trim);
                if (!result.inputWasSplit) break;
            }
        }
        return result.result;
    }

    private static SplitString split(List<String> input, int maxTokens, List<Pattern> separators, boolean trim) {
        ArrayList<String> result = new ArrayList<String>();
        boolean modified = false;
        for (String str : input) {
            SplitString r = TextChunker.split(str, maxTokens, separators, trim);
            result.addAll(r.result);
            modified |= r.inputWasSplit;
        }
        return new SplitString(modified, result);
    }

    private static int indexOfAny(List<Pattern> separators, String input) {
        return separators.stream().map(it -> {
            Matcher matcher = it.matcher(input);
            if (matcher.find()) {
                return matcher.start();
            }
            return -1;
        }).filter(it -> it != -1).min(Integer::compareTo).orElse(-1);
    }

    private static SplitString split(String input, int maxTokens, List<Pattern> separators, boolean trim) {
        if (TextChunker.TokenCount(input.length()) > maxTokens) {
            boolean inputWasSplit = true;
            int half = input.length() / 2;
            int cutPoint = -1;
            if (separators.size() == 1 && separators.get(0) == null) {
                cutPoint = half;
            } else if (input.length() > 2) {
                int index;
                int pos = 0;
                while ((index = TextChunker.indexOfAny(separators, input.substring(pos, input.length() - 1))) >= 0) {
                    if (Math.abs(half - (index += pos)) < Math.abs(half - cutPoint)) {
                        cutPoint = index + 1;
                    }
                    pos = index + 1;
                }
            }
            List<String> result = Collections.singletonList(input);
            if (cutPoint > 0) {
                String firstHalf = input.substring(0, cutPoint);
                String secondHalf = input.substring(cutPoint);
                if (trim) {
                    firstHalf = firstHalf.trim();
                    secondHalf = secondHalf.trim();
                }
                SplitString first = TextChunker.split(firstHalf, maxTokens, separators, trim);
                SplitString second = TextChunker.split(secondHalf, maxTokens, separators, trim);
                result = Stream.concat(first.result.stream(), second.result.stream()).collect(Collectors.toList());
                inputWasSplit = first.inputWasSplit || second.inputWasSplit;
            }
            return new SplitString(inputWasSplit, result);
        }
        return new SplitString(false, Collections.singletonList(input));
    }

    private static int TokenCount(int inputLength) {
        return inputLength / 4;
    }

    private static class SplitString {
        public final boolean inputWasSplit;
        public final List<String> result;

        private SplitString(boolean inputWasSplit, List<String> result) {
            this.inputWasSplit = inputWasSplit;
            this.result = result;
        }
    }
}

