/*
 * Decompiled with CFR 0.152.
 */
package com.digiwin.chatbi.reasoning.boostEngine.chunk.model;

import com.digiwin.chatbi.reasoning.boostEngine.chunk.model.Token;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/*
 * Exception performing whole class analysis ignored.
 */
public class Tokenizer {
    public static List<Token> process(String originalSentence, List<Token> tokens) {
        tokens.sort((token1, token2) -> token1.getStart() - token2.getStart());
        List reprocessedTokens = Tokenizer.preprocess((String)originalSentence, tokens);
        List mergedTokens = Tokenizer.filterOverlappingTokens((List)reprocessedTokens);
        List nonOverlappingTokens = Tokenizer.generateWordSegmentationGraph((List)mergedTokens);
        return nonOverlappingTokens;
    }

    private static List<Token> preprocess(String originalSentence, List<Token> tokens) {
        ArrayList<Token> newTokens = new ArrayList<Token>();
        int lastEnd = 0;
        for (Token token : tokens) {
            if (token.getStart() > lastEnd) {
                Token newToken = new Token(originalSentence.substring(lastEnd, token.getStart()), lastEnd, token.getStart());
                newTokens.add(newToken);
            }
            lastEnd = Math.max(lastEnd, token.getEnd());
        }
        if (lastEnd < originalSentence.length()) {
            Token newToken = new Token(originalSentence.substring(lastEnd), lastEnd, originalSentence.length());
            newTokens.add(newToken);
        }
        tokens.addAll(newTokens);
        Collections.sort(tokens, (t1, t2) -> Integer.compare(t1.getStart(), t2.getStart()));
        return tokens;
    }

    private static List<Token> filterOverlappingTokens(List<Token> tokens) {
        ArrayList<Token> filteredTokens = new ArrayList<Token>();
        tokens.sort((t1, t2) -> {
            if (t1.getStart() != t2.getStart()) {
                return Integer.compare(t1.getStart(), t2.getStart());
            }
            return Integer.compare(t2.getEnd(), t1.getEnd());
        });
        int lastEnd = -1;
        for (Token token : tokens) {
            if (token.getEnd() <= lastEnd) continue;
            filteredTokens.add(token);
            lastEnd = token.getEnd();
        }
        return filteredTokens;
    }

    private static List<Token> generateWordSegmentationGraph(List<Token> tokens) {
        ArrayList<Token> nonOverlappingTokens = new ArrayList<Token>();
        int lastEnd = -1;
        for (Token token : tokens) {
            if (token.getStart() >= lastEnd) {
                nonOverlappingTokens.add(token);
                lastEnd = token.getEnd();
                continue;
            }
            if (token.getEnd() <= lastEnd) continue;
            Token newToken = new Token(token.getToken().substring(lastEnd - token.getStart()), lastEnd, token.getEnd());
            nonOverlappingTokens.add(newToken);
            lastEnd = token.getEnd();
        }
        ArrayList<Token> mergedTokens = new ArrayList<Token>();
        for (int i = 0; i < nonOverlappingTokens.size(); ++i) {
            if (i + 1 < nonOverlappingTokens.size()) {
                Token merged = Token.merge((Token)((Token)nonOverlappingTokens.get(i)), (Token)((Token)nonOverlappingTokens.get(i + 1)));
                if (merged != null) {
                    mergedTokens.add(merged);
                    ++i;
                    continue;
                }
                mergedTokens.add((Token)nonOverlappingTokens.get(i));
                continue;
            }
            mergedTokens.add((Token)nonOverlappingTokens.get(i));
        }
        return mergedTokens;
    }

    public static void main(String[] args) {
        String sentence = "\u5154\u5b50\u5403\u8349\u771f\u9999\u5440\u54c8\u54c8";
        ArrayList<Token> tokens = new ArrayList<Token>();
        Token token1 = new Token("\u5154\u5b50", 0, 2);
        Token token2 = new Token("\u771f\u9999", 4, 6);
        Token token3 = new Token("\u771f\u9999\u5440", 4, 7);
        Token token4 = new Token("\u8349\u771f\u9999", 3, 6);
        token1.setTokenType("A");
        token2.setTokenType("A");
        token3.setTokenType("A");
        token4.setTokenType("A");
        tokens.add(token1);
        tokens.add(token2);
        tokens.add(token3);
        tokens.add(token4);
        List allTokens = Tokenizer.process((String)sentence, tokens);
        allTokens.forEach(System.err::println);
    }
}

