package com.esen.util.search.core.util;

import com.esen.util.StrFunc;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.HtmlPage;

/* loaded from: input_file:com/esen/util/search/core/util/LuceneHtmlParser.class */
public class LuceneHtmlParser {
    private static final String DEFAULT_SPLIT = " ";
    private String html;
    private String title = "";
    private String content = "";

    public LuceneHtmlParser() {
    }

    public LuceneHtmlParser(String str) {
        this.html = str;
    }

    public void setInputHTML(String str) {
        this.html = str;
    }

    public String getTitle() {
        return this.title;
    }

    public String getContent() {
        return this.content;
    }

    public void parse() throws ParserException {
        if (StrFunc.isNull(this.html)) {
            return;
        }
        StringBuffer stringBuffer = new StringBuffer();
        Parser parser = new Parser(this.html);
        HtmlPage htmlPage = new HtmlPage(parser);
        parser.visitAllNodesWith(htmlPage);
        this.title = htmlPage.getTitle();
        NodeList body = htmlPage.getBody();
        if (body != null) {
            Node[] nodeArray = body.toNodeArray();
            for (int i = 0; nodeArray != null && i < nodeArray.length; i++) {
                String trim = nodeArray[i].toPlainTextString().trim();
                if (!StrFunc.isNull(trim)) {
                    stringBuffer.append(trim);
                    stringBuffer.append(" ");
                }
            }
        }
        this.content = stringBuffer.toString();
    }
}
