package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import java.io.IOException;
import java.io.StringReader;
import java.net.URL;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
import org.apache.http.client.methods.HttpHead;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.cyberneko.html.HTMLConfiguration;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/l3s/boilerpipe/sax/HTMLHighlighter.class */
public final class HTMLHighlighter {
    private boolean outputHighlightOnly = false;
    private String extraStyleSheet = "\n<style type=\"text/css\">\n.x-boilerpipe-mark1 { text-decoration:none; background-color: #ffff42 !important; color: black !important; display:inline !important; visibility:visible !important; }\n</style>\n";
    private String preHighlight = "<span class=\"x-boilerpipe-mark1\">";
    private String postHighlight = "</span>";
    private static final TagAction TA_IGNORABLE_ELEMENT = new TagAction() { // from class: de.l3s.boilerpipe.sax.HTMLHighlighter.1
        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void beforeStart(Implementation implementation, String str) {
            Implementation.access$108(implementation);
        }

        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void afterEnd(Implementation implementation, String str) {
            Implementation.access$110(implementation);
        }
    };
    private static final TagAction TA_HEAD = new TagAction() { // from class: de.l3s.boilerpipe.sax.HTMLHighlighter.2
        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void beforeEnd(Implementation implementation, String str) {
            implementation.html.append(implementation.hl.extraStyleSheet);
        }
    };
    private static Map<String, TagAction> TAG_ACTIONS = new HashMap();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/l3s/boilerpipe/sax/HTMLHighlighter$Implementation.class */
    public final class Implementation extends AbstractSAXParser implements ContentHandler {
        StringBuilder html;
        private int inIgnorableElement;
        private int characterElementIdx;
        private final BitSet contentBitSet;
        private final HTMLHighlighter hl;

        Implementation() {
            super(new HTMLConfiguration());
            this.html = new StringBuilder();
            this.inIgnorableElement = 0;
            this.characterElementIdx = 0;
            this.contentBitSet = new BitSet();
            this.hl = HTMLHighlighter.this;
            setContentHandler(this);
        }

        void process(TextDocument textDocument, InputSource inputSource) throws BoilerpipeProcessingException {
            BitSet containedTextElements;
            for (TextBlock textBlock : textDocument.getTextBlocks()) {
                if (textBlock.isContent() && (containedTextElements = textBlock.getContainedTextElements()) != null) {
                    this.contentBitSet.or(containedTextElements);
                }
            }
            try {
                parse(inputSource);
            } catch (IOException e) {
                throw new BoilerpipeProcessingException(e);
            } catch (SAXException e2) {
                throw new BoilerpipeProcessingException(e2);
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endPrefixMapping(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void processingInstruction(String str, String str2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void setDocumentLocator(Locator locator) {
        }

        @Override // org.xml.sax.ContentHandler
        public void skippedEntity(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            TagAction tagAction = (TagAction) HTMLHighlighter.TAG_ACTIONS.get(str2);
            if (tagAction != null) {
                tagAction.beforeStart(this, str2);
            }
            try {
                if (this.inIgnorableElement == 0) {
                    if (HTMLHighlighter.this.outputHighlightOnly && !this.contentBitSet.get(this.characterElementIdx)) {
                        if (tagAction != null) {
                            return;
                        } else {
                            return;
                        }
                    }
                    this.html.append('<');
                    this.html.append(str3);
                    int length = attributes.getLength();
                    for (int i = 0; i < length; i++) {
                        String qName = attributes.getQName(i);
                        String value = attributes.getValue(i);
                        this.html.append(' ');
                        this.html.append(qName);
                        this.html.append("=\"");
                        this.html.append(HTMLHighlighter.xmlEncode(value));
                        this.html.append("\"");
                    }
                    this.html.append('>');
                }
                if (tagAction != null) {
                    tagAction.afterStart(this, str2);
                }
            } finally {
                if (tagAction != null) {
                    tagAction.afterStart(this, str2);
                }
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            TagAction tagAction = (TagAction) HTMLHighlighter.TAG_ACTIONS.get(str2);
            if (tagAction != null) {
                tagAction.beforeEnd(this, str2);
            }
            try {
                if (this.inIgnorableElement == 0) {
                    if (HTMLHighlighter.this.outputHighlightOnly && !this.contentBitSet.get(this.characterElementIdx)) {
                        if (tagAction != null) {
                            return;
                        } else {
                            return;
                        }
                    } else {
                        this.html.append("</");
                        this.html.append(str3);
                        this.html.append('>');
                    }
                }
                if (tagAction != null) {
                    tagAction.afterEnd(this, str2);
                }
            } finally {
                if (tagAction != null) {
                    tagAction.afterEnd(this, str2);
                }
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            this.characterElementIdx++;
            if (this.inIgnorableElement == 0) {
                boolean z = this.contentBitSet.get(this.characterElementIdx);
                if (z || !HTMLHighlighter.this.outputHighlightOnly) {
                    if (z) {
                        this.html.append(HTMLHighlighter.this.preHighlight);
                    }
                    this.html.append(HTMLHighlighter.xmlEncode(String.valueOf(cArr, i, i2)));
                    if (z) {
                        this.html.append(HTMLHighlighter.this.postHighlight);
                    }
                }
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void startPrefixMapping(String str, String str2) throws SAXException {
        }

        static /* synthetic */ int access$108(Implementation implementation) {
            int i = implementation.inIgnorableElement;
            implementation.inIgnorableElement = i + 1;
            return i;
        }

        static /* synthetic */ int access$110(Implementation implementation) {
            int i = implementation.inIgnorableElement;
            implementation.inIgnorableElement = i - 1;
            return i;
        }
    }

    /* loaded from: input_file:de/l3s/boilerpipe/sax/HTMLHighlighter$TagAction.class */
    private static abstract class TagAction {
        private TagAction() {
        }

        void beforeStart(Implementation implementation, String str) {
        }

        void afterStart(Implementation implementation, String str) {
        }

        void beforeEnd(Implementation implementation, String str) {
        }

        void afterEnd(Implementation implementation, String str) {
        }
    }

    public static HTMLHighlighter newHighlightingInstance() {
        return new HTMLHighlighter(false);
    }

    public static HTMLHighlighter newExtractingInstance() {
        return new HTMLHighlighter(true);
    }

    private HTMLHighlighter(boolean z) {
        if (z) {
            setOutputHighlightOnly(true);
            setExtraStyleSheet("");
            setPreHighlight("");
            setPostHighlight("");
        }
    }

    public String process(TextDocument textDocument, String str) throws BoilerpipeProcessingException {
        return process(textDocument, new InputSource(new StringReader(str)));
    }

    public String process(TextDocument textDocument, InputSource inputSource) throws BoilerpipeProcessingException {
        Implementation implementation = new Implementation();
        implementation.process(textDocument, inputSource);
        return implementation.html.toString();
    }

    public String process(URL url, BoilerpipeExtractor boilerpipeExtractor) throws IOException, BoilerpipeProcessingException, SAXException {
        HTMLDocument fetch = HTMLFetcher.fetch(url);
        TextDocument textDocument = new BoilerpipeSAXInput(fetch.toInputSource()).getTextDocument();
        boilerpipeExtractor.process(textDocument);
        return process(textDocument, fetch.toInputSource());
    }

    public boolean isOutputHighlightOnly() {
        return this.outputHighlightOnly;
    }

    public void setOutputHighlightOnly(boolean z) {
        this.outputHighlightOnly = z;
    }

    public String getExtraStyleSheet() {
        return this.extraStyleSheet;
    }

    public void setExtraStyleSheet(String str) {
        this.extraStyleSheet = str;
    }

    public String getPreHighlight() {
        return this.preHighlight;
    }

    public void setPreHighlight(String str) {
        this.preHighlight = str;
    }

    public String getPostHighlight() {
        return this.postHighlight;
    }

    public void setPostHighlight(String str) {
        this.postHighlight = str;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String xmlEncode(String str) {
        if (str == null) {
            return "";
        }
        StringBuilder sb = new StringBuilder(str.length());
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            switch (charAt) {
                case '\"':
                    sb.append("&quot;");
                    break;
                case '&':
                    sb.append("&amp;");
                    break;
                case '<':
                    sb.append("&lt;");
                    break;
                case '>':
                    sb.append("&gt;");
                    break;
                default:
                    sb.append(charAt);
                    break;
            }
        }
        return sb.toString();
    }

    static {
        TAG_ACTIONS.put("STYLE", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("SCRIPT", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("OPTION", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("OBJECT", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("EMBED", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("APPLET", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("LINK", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put(HttpHead.METHOD_NAME, TA_HEAD);
    }
}
