/*
 * Decompiled with CFR 0.152.
 */
package com.zimbra.common.util;

import java.io.IOException;
import java.io.Reader;
import org.cyberneko.html.parsers.SAXParser;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class HtmlTextExtractor
extends DefaultHandler {
    private StringBuilder sb = new StringBuilder(1024);
    private String title = null;
    boolean inTitle = false;
    boolean inCharacters = false;
    int skipping = 0;
    int maxLength;

    public HtmlTextExtractor(int maxLength) {
        this.maxLength = maxLength;
    }

    public void startDocument() {
        this.sb.setLength(0);
    }

    public void startElement(String uri, String localName, String qName, Attributes attributes) {
        String altText;
        if (this.sb.length() >= this.maxLength) {
            return;
        }
        String element = localName.toUpperCase();
        if ("TITLE".equals(element)) {
            this.inTitle = true;
        } else if ("STYLE".equals(element) || "SCRIPT".equals(element)) {
            ++this.skipping;
        } else if ("IMG".equals(element) && attributes != null && (altText = attributes.getValue("alt")) != null && !altText.equals("")) {
            if (this.sb.length() > 0) {
                this.sb.append(' ');
            }
            this.sb.append('[').append(altText).append(']');
        }
        this.inCharacters = false;
    }

    public void characters(char[] ch, int offset, int length) {
        if (this.skipping > 0 || length == 0 || this.sb.length() >= this.maxLength) {
            return;
        }
        if (this.inTitle) {
            String content = new String(ch, offset, length);
            if (length > 0) {
                this.title = this.title == null ? content : this.title + (this.inCharacters ? "" : " ") + content;
            }
        } else {
            char c;
            int original = offset;
            while (length > 0 && ((c = ch[offset]) <= ' ' || c == '\u00a0')) {
                ++offset;
                --length;
            }
            if (length > 0) {
                if (!(this.sb.length() <= 0 || this.inCharacters && original == offset)) {
                    this.sb.append(' ');
                }
                if (this.sb.length() + length > this.maxLength) {
                    this.sb.append(ch, offset, this.maxLength - this.sb.length());
                } else {
                    this.sb.append(ch, offset, length);
                }
            }
        }
        this.inCharacters = length > 0;
    }

    public void endElement(String uri, String localName, String qName) {
        if (this.sb.length() > this.maxLength) {
            return;
        }
        String element = localName.toUpperCase();
        if ("TITLE".equals(element)) {
            this.inTitle = false;
        } else if ("STYLE".equals(element) || "SCRIPT".equals(element)) {
            --this.skipping;
        }
        this.inCharacters = false;
    }

    public String toString() {
        return this.sb.toString();
    }

    public String getTitle() {
        return this.title == null ? "" : this.title;
    }

    public static String extract(Reader htmlReader, int sizeLimit) throws IOException, SAXException {
        SAXParser parser = new SAXParser();
        HtmlTextExtractor handler = new HtmlTextExtractor(sizeLimit);
        parser.setContentHandler(handler);
        parser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
        parser.parse(new InputSource(htmlReader));
        return handler.toString();
    }
}

