/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.warwick.util.content.cleaner;

import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.ccil.cowan.tagsoup.Parser;
import org.ccil.cowan.tagsoup.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import uk.ac.warwick.html5.HTML5Schema;
import uk.ac.warwick.util.collections.Pair;
import uk.ac.warwick.util.collections.Triple;
import uk.ac.warwick.util.content.MutableContent;
import uk.ac.warwick.util.content.cleaner.Cleaner;
import uk.ac.warwick.util.content.cleaner.CleanerWriter;
import uk.ac.warwick.util.content.cleaner.HtmlContentWriter;
import uk.ac.warwick.util.content.cleaner.TagAndAttributeFilter;
import uk.ac.warwick.util.content.cleaner.TagAndAttributeFilterImpl;
import uk.ac.warwick.util.core.ObjectProvider;

public final class HtmlCleaner
implements Cleaner {
    public static final Logger LOGGER = LoggerFactory.getLogger(HtmlCleaner.class);
    private final List<Pair<String, String>> straightReplacements;
    private final List<Pair<String, String>> postParseStraightReplacements;
    private final List<Triple<Pattern, String, String>> regexReplacements;
    private final List<Triple<Pattern, String, String>> postParseRegexReplacements;
    private final HtmlContentWriter contentWriter;
    private ObjectProvider<TagAndAttributeFilter> filterProvider = new ObjectProvider<TagAndAttributeFilter>(){

        @Override
        public TagAndAttributeFilter newInstance() {
            return new TagAndAttributeFilterImpl();
        }
    };
    private boolean allowJavascriptHandlers = true;
    private boolean allowBlockquoteWithNoAttributes = true;
    private boolean prettyPrint = false;
    private Schema schema = new HTML5Schema();

    public HtmlCleaner() {
        this(null);
    }

    public HtmlCleaner(HtmlContentWriter theContentWriter) {
        this.contentWriter = theContentWriter;
        this.straightReplacements = Lists.newArrayList();
        this.straightReplacements.add(Pair.of("_mce_thref=", "href="));
        this.straightReplacements.add(Pair.of("_mce_tsrc=", "src="));
        this.straightReplacements.add(Pair.of("mce_thref=", "href="));
        this.straightReplacements.add(Pair.of("mce_tsrc=", "src="));
        this.straightReplacements.add(Pair.of("<img class='targetBlank' alt='' title='Link opens in a new window' src='/static_war/images/shim.gif' />", ""));
        this.straightReplacements.add(Pair.of("<i class='new-window-link' aria-hidden='true' title='Link opens in a new window'></i><span class='sr-only'>Link opens in a new window</span>", ""));
        this.straightReplacements.add(Pair.of("\u00b7", "&#183;"));
        this.straightReplacements.add(Pair.of("&#65279;", ""));
        this.regexReplacements = Lists.newArrayList();
        this.regexReplacements.add(Triple.of(Pattern.compile("&nbsp;(&nbsp;)+"), "&nbsp;", "&nbsp;"));
        this.regexReplacements.add(Triple.of(Pattern.compile(">(&nbsp;| )*&nbsp;(&nbsp;| )*<"), "&nbsp;", ">_NONBREAKINGSPACE_<"));
        this.regexReplacements.add(Triple.of(Pattern.compile("&nbsp;"), "&nbsp;", " "));
        this.regexReplacements.add(Triple.of(Pattern.compile("_NONBREAKINGSPACE_"), "_nonbreakingspace_", "&nbsp;"));
        this.regexReplacements.add(Triple.of(Pattern.compile("<!--\\[if [a-z]+ mso \\d*\\]>.*?<!\\-*\\[endif\\].*?-->", 34), "[endif]", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("<!--\\[if supportFields\\]>.*?<!\\[endif\\]-->", 34), "[if supportfields]", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("<!--\\[if !mso\\]>.*?<!-*\\[endif\\]-->", 34), "[if !mso]", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("<!--\\[if gte vml 1\\]>.*?<!\\[endif\\]-->", 34), "[if gte vml 1]", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("<br _?mce_bogus=\"?1\"?\\s*/?>", 2), "_bogus", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("<br data-mce-bogus=\"?1\"?\\s*/?>", 2), "data-mce-bogus", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("<mce:style([^>]*)>\\<\\!\\-\\-(.*?)\\-\\-\\></mce:style>", 34), "</mce:style>", "<style$1>$2</style>"));
        this.regexReplacements.add(Triple.of(Pattern.compile("<style[^>]* _?mce_bogus=\"?1\"?\\s*>.*?</style>", 34), "</style>", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("<style[^>]* data-mce-bogus=\"?1\"?\\s*>.*?</style>", 34), "</style>", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("<mce\\:([a-z]*)([^>]*)>(.*?)<\\/mce\\:\\1>", 34), "<mce:", "<$1$2>$3</$1>"));
        this.regexReplacements.add(Triple.of(Pattern.compile("<p>\\s*(<script.*?<\\/script>)\\s*</p>", 34), "</script>", "$1"));
        this.regexReplacements.add(Triple.of(Pattern.compile("(<t[dh][^>]*)\\salign=[\"']?middle[\"']?", 2), "middle", "$1 align=\"center\""));
        this.regexReplacements.add(Triple.of(Pattern.compile("(<t[dh][^>]*>)\\s*(</t[dh]>)", 34), "</t", "$1&nbsp;$2"));
        this.regexReplacements.add(Triple.of(Pattern.compile("<p>\\s*(<!--.*?-->)\\s*</p>", 34), "<!--", "$1"));
        this.regexReplacements.add(Triple.of(Pattern.compile("(mce-)+text/javascript", 34), "<script", "text/javascript"));
        this.regexReplacements.add(Triple.of(Pattern.compile("<p>(.*?)<meta[^>]+>(.*?)</p>", 34), "<meta", "<p>$1$2</p>"));
        this.regexReplacements.add(Triple.of(Pattern.compile("<p>(.*?)<title>[^<]*</title>(.*?)</p>", 34), "</title>", "<p>$1$2</p>"));
        this.regexReplacements.add(Triple.of(Pattern.compile("<p>(.*?)<link[^>]+>(?:</link>)?(.*?)</p>", 34), "<link", "<p>$1$2</p>"));
        this.regexReplacements.add(Triple.of(Pattern.compile("<p[^>]*class=\"?Mso(?:[A-Z][a-z]+)+\"?[^>]*>(?:<!--\\[if !supportLists\\]-->)?(?:<\\/?(?:span|font)[^>]*>)*(?:&#183;|\u00b7)(?:<\\/?(?:span|font)[^>]*>)*(?:&nbsp;)*\\s*(?:<\\/?(?:span|font)[^>]*>)*(?:<!--\\[endif\\]-->)?(.*?)(?:<\\/?(?:span|font)[^>]*>)*</p>", 34), "&#183;", "<li>$1</li>"));
        this.postParseStraightReplacements = Lists.newArrayList();
        this.postParseStraightReplacements.add(Pair.of("<b></b>", ""));
        this.postParseStraightReplacements.add(Pair.of("<strong></strong>", ""));
        this.postParseRegexReplacements = Lists.newArrayList();
        String keepThis = "&#65279;";
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile("(<a [^>]*(?:id=|name=)[^>]+>)\\s*(</a>)\\n*"), "</a>", "$1" + keepThis + "</a>"));
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile("<a( [^>]+)?>\\s*</a>\n*"), "</a>", ""));
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile(keepThis), keepThis, ""));
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile("<p>\\s*</p>\n*"), "</p>", ""));
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile("<h([1-6])>\\s*</h\\1>\n*"), "</h", ""));
        this.regexReplacements.add(Triple.of(Pattern.compile("\\bstyle=(\"padding-left:\\s*\\d{2,}px;?\\s*\")", 2), "padding-left", "tinymce_indent=$1"));
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile("\\btinymce_indent=(\"padding-left:\\s*\\d{2,}px;?\\s*\")(?:\\sstyle=\"[^\"]*\")?", 2), "tinymce_indent", "style=$1"));
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile("<table\\sstyle=\"padding(-left:\\s*\\d{2,}px;?\\s*)\"", 2), "<table", "<table style=\"margin$1\""));
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile("\\s*<p>\\s*(<br\\s*/?>)?\\s*</p>\\s*$", 34), "</p>", ""));
        this.postParseRegexReplacements.add(Triple.of(Pattern.compile("<a [^>]+rel=\"lightbox\\[[^>]+></a>", 34), "lightbox[", ""));
    }

    @Override
    public String clean(String input, MutableContent mc) {
        String text = this.doPreParsingCleanup(input);
        Parser parser = new Parser();
        TagAndAttributeFilter filter = this.filterProvider.newInstance();
        filter.setAllowJavascriptHandlers(this.isAllowJavascriptHandlers());
        filter.setAllowBlockquoteWithNoAttributes(this.isAllowBlockquoteWithNoAttributes());
        CleanerWriter handler = new CleanerWriter(filter, mc);
        handler.setPrettyPrint(this.prettyPrint);
        if (this.contentWriter != null) {
            this.contentWriter.setDelegate(handler.getContentWriter());
            handler.setContentWriter(this.contentWriter);
        }
        try {
            InputSource is = new InputSource(new StringReader(text));
            parser.setFeature("http://www.ccil.org/~cowan/tagsoup/features/default-attributes", false);
            parser.setFeature("http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace", true);
            parser.setContentHandler((ContentHandler)handler);
            parser.setProperty("http://xml.org/sax/properties/lexical-handler", (Object)handler);
            parser.setProperty("http://www.ccil.org/~cowan/tagsoup/properties/schema", (Object)this.schema);
            parser.parse(is);
        }
        catch (IOException e) {
            throw new IllegalStateException(e);
        }
        catch (SAXException e) {
            throw new IllegalStateException("HTML cleanup error", e);
        }
        return this.doPostParsingCleanup(handler.getOutput());
    }

    String doPreParsingCleanup(String input) {
        String text = this.encodeLoneTags(input);
        for (Pair<String, String> pair : this.straightReplacements) {
            text = text.replace(pair.getLeft(), pair.getRight());
        }
        for (Triple triple : this.regexReplacements) {
            if (!text.toLowerCase().contains((CharSequence)triple.getMiddle())) continue;
            int attempts = 10;
            while (((Pattern)triple.getLeft()).matcher(text).find() && attempts-- > 0) {
                text = ((Pattern)triple.getLeft()).matcher(text).replaceAll((String)triple.getRight());
            }
        }
        text = this.doComplexOfficeTags(text);
        text = this.doOfficeStyles(text);
        text = text.replaceAll("<!--\\[(.+?)]-->", "");
        return text;
    }

    private String doComplexOfficeTags(String text) {
        if (text.indexOf("Mso") != -1 && text.indexOf("</o:p>") != -1) {
            Pattern outerPattern = Pattern.compile("<p[^>]*class=\"?Mso[a-z]+\"?[^>]*>(.*?)</p>", 2);
            Pattern innerPattern = Pattern.compile("(?:<\\?xml[^>]*>)?(?:<b style[^>]*>)?<o:p>(?:<font[^>]*>)?&nbsp;(?:</font>)?</o:p>(</b>)?", 2);
            Pattern innerPattern2 = Pattern.compile("<span [^>]*mce_name=\"strong\"[^>]*><o:p>(?:<font[^>]*>)?&nbsp;(?:</font>)?</o:p></span>", 2);
            Matcher outerMatcher = outerPattern.matcher(text);
            StringBuilder sb = new StringBuilder();
            int lastMatch = 0;
            int startIndex = 0;
            int endIndex = 0;
            while (outerMatcher.find()) {
                startIndex = outerMatcher.start();
                endIndex = outerMatcher.end();
                sb.append(text.substring(lastMatch, startIndex));
                String inner = text.substring(startIndex, endIndex);
                Matcher innerMatcher = innerPattern.matcher(outerMatcher.group(1));
                Matcher innerMatcher2 = innerPattern2.matcher(outerMatcher.group(1));
                if (!innerMatcher.matches() && !innerMatcher2.matches()) {
                    sb.append(inner);
                }
                lastMatch = endIndex;
            }
            sb.append(text.substring(endIndex));
            text = sb.toString();
        }
        return text;
    }

    private String doOfficeStyles(String text) {
        if ((text.toLowerCase().indexOf("<style") != -1 || text.toLowerCase().indexOf("<mce:style") != -1) && text.indexOf("mso-") != -1) {
            Pattern styles = Pattern.compile("<(?:mce\\:)?style[^>]*>(.*?)</(?:mce\\:)?style>\\s*", 34);
            Pattern officeStyle = Pattern.compile("^\\s*mso-.*$", 8);
            Matcher matcher = styles.matcher(text);
            StringBuilder sb = new StringBuilder();
            int lastMatch = 0;
            int startIndex = 0;
            int endIndex = 0;
            while (matcher.find()) {
                startIndex = matcher.start();
                endIndex = matcher.end();
                sb.append(text.substring(lastMatch, startIndex));
                String inner = text.substring(startIndex, endIndex);
                if (!officeStyle.matcher(matcher.group(1)).find()) {
                    sb.append(inner);
                }
                lastMatch = endIndex;
            }
            sb.append(text.substring(endIndex));
            text = sb.toString();
        }
        return text;
    }

    private String doPostParsingCleanup(String output) {
        String text = output;
        for (Pair<String, String> pair : this.postParseStraightReplacements) {
            text = text.replace(pair.getLeft(), pair.getRight());
        }
        for (Triple triple : this.postParseRegexReplacements) {
            int attempts = 10;
            while (text.toLowerCase().contains((CharSequence)triple.getMiddle()) && ((Pattern)triple.getLeft()).matcher(text).find() && attempts-- > 0) {
                text = ((Pattern)triple.getLeft()).matcher(text).replaceAll((String)triple.getRight());
            }
        }
        return text;
    }

    String encodeLoneTags(String input) {
        Pattern noScriptTags = Pattern.compile("<script[^>]*>(.*?)</script>", 34);
        Matcher matcher = noScriptTags.matcher(input);
        StringBuilder sb = new StringBuilder();
        int lastMatch = 0;
        int startIndex = 0;
        int endIndex = 0;
        while (matcher.find()) {
            startIndex = matcher.start();
            endIndex = matcher.end();
            sb.append(this.doEscaping(input.substring(lastMatch, startIndex)));
            sb.append(input.substring(startIndex, endIndex));
            lastMatch = endIndex;
        }
        sb.append(this.doEscaping(input.substring(endIndex)));
        return sb.toString();
    }

    String doEscaping(String input) {
        String result = input;
        Pattern p = Pattern.compile("<([^a-zA-Z?!/])");
        Matcher m = p.matcher(result);
        result = m.replaceAll("&lt;$1");
        return result;
    }

    public boolean isAllowJavascriptHandlers() {
        return this.allowJavascriptHandlers;
    }

    public void setAllowJavascriptHandlers(boolean allowJavascriptHandlers) {
        this.allowJavascriptHandlers = allowJavascriptHandlers;
    }

    public boolean isAllowBlockquoteWithNoAttributes() {
        return this.allowBlockquoteWithNoAttributes;
    }

    public void setAllowBlockquoteWithNoAttributes(boolean allowBlockquoteWithNoAttributes) {
        this.allowBlockquoteWithNoAttributes = allowBlockquoteWithNoAttributes;
    }

    public void setFilterProvider(ObjectProvider<TagAndAttributeFilter> filterProvider) {
        this.filterProvider = filterProvider;
    }

    public void setSchema(Schema schema) {
        this.schema = schema;
    }

    public boolean isPrettyPrint() {
        return this.prettyPrint;
    }

    public void setPrettyPrint(boolean prettyPrint) {
        this.prettyPrint = prettyPrint;
    }

    static enum ContentType {
        none,
        elementStart,
        elementEnd,
        characters,
        whitespace;

    }
}

