/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.processing.pipelets.boilerpipe;

import de.l3s.boilerpipe.BoilerpipeFilter;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.extractors.ArticleExtractor;
import de.l3s.boilerpipe.sax.BoilerpipeSAXInput;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.blackboard.BlackboardAccessException;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.Value;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.parameters.MissingParameterException;
import org.eclipse.smila.processing.parameters.ParameterAccessor;
import org.eclipse.smila.processing.pipelets.ATransformationPipelet;
import org.eclipse.smila.processing.util.ResultCollector;
import org.xml.sax.InputSource;

public class BoilerpipePipelet
extends ATransformationPipelet {
    public static final String ENCODING_ATTRIBUTE_PROPERTY = "encodingAttribute";
    public static final String ENCODING_PROPERTY = "defaultEncoding";
    public static final String FILTER_PROPERTY = "filter";
    public static final String MAX_PARSER_BLOCKS_PROPERTY = "maxParserBlocks";
    public static final int MAX_PARSER_BLOCKS_DEFAULT = 20000;
    private final Log _logger = LogFactory.getLog(((Object)((Object)this)).getClass());
    private AnyMap _configuration;
    private Collection<? extends BoilerpipeFilter> _filters;

    public void configure(AnyMap configuration) throws ProcessingException {
        this._configuration = configuration;
        ParameterAccessor paramAccessor = new ParameterAccessor(configuration);
        List filterNames = paramAccessor.getParameters(FILTER_PROPERTY);
        if (filterNames.isEmpty()) {
            this._logger.debug((Object)"Using default boilerpipe filter: de.l3s.boilerpipe.extractors.ArticleExtractor");
            this._filters = Collections.singleton(ArticleExtractor.INSTANCE);
        } else {
            ArrayList<? extends BoilerpipeFilter> filters = new ArrayList<BoilerpipeFilter>();
            for (String filterName : filterNames) {
                try {
                    BoilerpipeFilter filter;
                    try {
                        filter = (BoilerpipeFilter)Class.forName(filterName).newInstance();
                    }
                    catch (ClassNotFoundException e) {
                        int dot = filterName.lastIndexOf(46);
                        if (dot < 0) {
                            throw e;
                        }
                        Class<?> accessor = Class.forName(filterName.substring(0, dot));
                        if (filterName.endsWith("()")) {
                            Method method = accessor.getMethod(filterName.substring(dot + 1, filterName.length() - 2), new Class[0]);
                            filter = (BoilerpipeFilter)method.invoke(null, new Object[0]);
                        }
                        filter = (BoilerpipeFilter)accessor.getField(filterName.substring(dot + 1)).get(null);
                    }
                    if (filter == null) continue;
                    this._logger.debug((Object)("Using boilerpipe filter: " + filter.getClass()));
                    filters.add((BoilerpipeFilter)filter);
                }
                catch (Exception e) {
                    throw new ProcessingException("Could not access boilerpipe filter: " + filterName, (Throwable)e);
                }
            }
            this._filters = filters;
        }
    }

    public String[] process(Blackboard blackboard, String[] recordIds) throws ProcessingException {
        ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, this._configuration);
        ResultCollector resultCollector = new ResultCollector(paramAccessor, this._logger, false);
        String[] stringArray = recordIds;
        int n = recordIds.length;
        int n2 = 0;
        while (n2 < n) {
            String id = stringArray[n2];
            paramAccessor.setCurrentRecord(id);
            this._logger.debug((Object)("Processing record: " + id));
            try {
                InputSource source = this.extractInputSource(blackboard, paramAccessor, id);
                if (source != null) {
                    int maxParserBlockSize = paramAccessor.getIntParameter(MAX_PARSER_BLOCKS_PROPERTY, Integer.valueOf(20000));
                    BoilerpipeSAXInput saxInput = new BoilerpipeSAXInput(source, maxParserBlockSize);
                    TextDocument document = saxInput.getTextDocument();
                    if (saxInput.hasStopped()) {
                        this._logger.warn((Object)"Parsed html was too big and the parsing has been stopped somewhere in the middle");
                    }
                    boolean changed = false;
                    for (BoilerpipeFilter boilerpipeFilter : this._filters) {
                        if (!boilerpipeFilter.process(document) || changed) continue;
                        changed = true;
                    }
                    if (changed) {
                        this.storeResult(blackboard, id, document.getContent(), paramAccessor);
                    } else {
                        this._logger.debug((Object)"None of the configured filters produced a result");
                    }
                } else {
                    this._logger.debug((Object)"No input found");
                }
                resultCollector.addResult(id);
            }
            catch (Exception e) {
                resultCollector.addFailedResult(id, e);
            }
            ++n2;
        }
        return resultCollector.getResultIds();
    }

    private InputSource extractInputSource(Blackboard blackboard, ParameterAccessor paramAccessor, String id) throws MissingParameterException, BlackboardAccessException, UnsupportedEncodingException {
        String inputName = this.getInputName(paramAccessor);
        if (this.isReadFromAttribute(this.getInputType(paramAccessor))) {
            String value;
            Any inputAny = (Any)blackboard.getMetadata(id).get((Object)inputName);
            if (inputAny != null && inputAny.isValue() && (value = ((Value)inputAny).asString()) != null) {
                return new InputSource(new StringReader(value));
            }
        } else if (blackboard.hasAttachment(id, inputName)) {
            String encoding = null;
            String encodingAttribute = paramAccessor.getParameter(ENCODING_ATTRIBUTE_PROPERTY, null);
            if (encodingAttribute != null) {
                encoding = blackboard.getMetadata(id).getStringValue(encodingAttribute);
            }
            if (encoding == null) {
                encoding = paramAccessor.getParameter(ENCODING_PROPERTY, null);
            }
            if (encoding == null) {
                return new InputSource(blackboard.getAttachmentAsStream(id, inputName));
            }
            this._logger.debug((Object)("Using encoding: " + encoding));
            return new InputSource(new InputStreamReader(blackboard.getAttachmentAsStream(id, inputName), encoding));
        }
        return null;
    }
}

