/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.tika;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.WriteOutContentHandler;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.blackboard.BlackboardAccessException;
import org.eclipse.smila.common.mimetype.MimeTypeIdentifier;
import org.eclipse.smila.common.mimetype.MimeTypeParseException;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.parameters.ParameterAccessor;
import org.eclipse.smila.processing.pipelets.ATransformationPipelet;
import org.eclipse.smila.processing.util.ResultCollector;
import org.eclipse.smila.tika.internal.PageBreakWriteOutContentHandler;
import org.eclipse.smila.tika.internal.TemporaryResourcesHelper;
import org.eclipse.smila.utils.service.ServiceUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TikaPipelet
extends ATransformationPipelet {
    public static final String PROP_ATTACHMENT_CONTENT_TYPE_ATTRIBUTE = "contentTypeAttribute";
    public static final String PROP_FILE_NAME_ATTRIBUTE = "fileNameAttribute";
    public static final String PROP_EXTRACT_PROPERTIES = "extractProperties";
    public static final String PROP_EXPORT_AS_HTML = "exportAsHtml";
    public static final String PROP_PAGE_BREAK = "pageBreak";
    public static final String PROP_PAGE_NUMBER_ATTRIBUTE = "pageNumberAttribute";
    public static final String PROP_KEEP_HYPHENS = "keepHyphens";
    public static final String PROP_MAX_LENGTH = "maxLength";
    public static final int DEFAULT_MAX_LENGTH = -1;
    public static final String PROP_MAPPING_METADATA_NAME = "metadataName";
    public static final String PROP_MAPPING_TARGET_ATTRIBUTE = "targetAttribute";
    public static final String PROP_MAPPING_SINGLE_RESULT = "singleResult";
    public static final String PROP_MAPPING_STORE_MODE = "storeMode";
    private final Log _log = LogFactory.getLog(((Object)((Object)this)).getClass());
    private Parser _parser;
    private MimeTypeIdentifier _identifier;

    public void configure(AnyMap configuration) throws ProcessingException {
        super.configure(configuration);
    }

    public String[] process(Blackboard blackboard, String[] recordIds) throws ProcessingException {
        ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, this._config);
        ResultCollector resultCollector = new ResultCollector(paramAccessor, this._log, false);
        ClassLoader tcclBackup = Thread.currentThread().getContextClassLoader();
        Thread.currentThread().setContextClassLoader(((Object)((Object)this)).getClass().getClassLoader());
        try {
            String[] stringArray = recordIds;
            int n = recordIds.length;
            int n2 = 0;
            while (n2 < n) {
                String id = stringArray[n2];
                paramAccessor.setCurrentRecord(id);
                try {
                    Throwable throwable = null;
                    Object var11_13 = null;
                    try (InputStream dataStream = this.getInputStream(blackboard, id, paramAccessor);){
                        if (dataStream != null && dataStream.available() > 0) {
                            List<String> resultIds = this.convertAndExtract(blackboard, id, dataStream, paramAccessor);
                            for (String resultId : resultIds) {
                                resultCollector.addResult(resultId);
                            }
                        } else {
                            resultCollector.addResult(id);
                        }
                    }
                    catch (Throwable throwable2) {
                        if (throwable == null) {
                            throwable = throwable2;
                        } else if (throwable != throwable2) {
                            throwable.addSuppressed(throwable2);
                        }
                        throw throwable;
                    }
                }
                catch (IOException | TransformerConfigurationException | TikaException | BlackboardAccessException | MimeTypeParseException | SAXException e) {
                    resultCollector.addFailedResult(id, (Exception)e);
                }
                ++n2;
            }
        }
        finally {
            Thread.currentThread().setContextClassLoader(tcclBackup);
        }
        return resultCollector.getResultIds();
    }

    private AnyMap readAndPrepareExtractProperties(Any extractPropsAny) {
        AnyMap result = DataFactory.DEFAULT.createAnyMap();
        if (extractPropsAny != null) {
            for (Any mappingEntry : extractPropsAny) {
                if (mappingEntry.isMap()) {
                    AnyMap newEntry = DataFactory.DEFAULT.cloneAnyMap(mappingEntry.asMap());
                    if (!newEntry.containsKey((Object)PROP_MAPPING_TARGET_ATTRIBUTE)) {
                        newEntry.put(PROP_MAPPING_TARGET_ATTRIBUTE, newEntry.getStringValue(PROP_MAPPING_METADATA_NAME));
                    }
                    String newName = newEntry.getStringValue(PROP_MAPPING_METADATA_NAME).toLowerCase(Locale.ENGLISH);
                    newEntry.put(PROP_MAPPING_METADATA_NAME, newName);
                    result.put(newName, (Any)newEntry);
                    continue;
                }
                this._log.error((Object)"TikaPipelet configuration extractProperties must contain maps describing the mapping.");
            }
        }
        return result;
    }

    private List<String> convertAndExtract(Blackboard blackboard, String id, InputStream dataStream, ParameterAccessor paramAccessor) throws TransformerConfigurationException, TikaException, ProcessingException, SAXException, BlackboardAccessException, IOException, MimeTypeParseException {
        ArrayList<String> resultIds;
        block23: {
            resultIds = new ArrayList<String>();
            resultIds.add(id);
            String contentTypeAttribute = paramAccessor.getParameter(PROP_ATTACHMENT_CONTENT_TYPE_ATTRIBUTE, null);
            String fileNameAttribute = paramAccessor.getParameter(PROP_FILE_NAME_ATTRIBUTE, null);
            String pageNumberAttribute = paramAccessor.getParameter(PROP_PAGE_NUMBER_ATTRIBUTE, null);
            boolean exportAsHtml = paramAccessor.getBooleanParameter(PROP_EXPORT_AS_HTML, Boolean.valueOf(false));
            boolean pageBreak = paramAccessor.getBooleanParameter(PROP_PAGE_BREAK, Boolean.valueOf(false));
            boolean keepHyphens = paramAccessor.getBooleanParameter(PROP_KEEP_HYPHENS, Boolean.valueOf(false));
            int maxLength = paramAccessor.getIntParameter(PROP_MAX_LENGTH, Integer.valueOf(-1));
            Any extractPropsAny = paramAccessor.getParameterAny(PROP_EXTRACT_PROPERTIES);
            AnyMap extractProperties = this.readAndPrepareExtractProperties(extractPropsAny);
            String inputFileName = blackboard.getMetadata(id).getStringValue(fileNameAttribute);
            String inputContentType = blackboard.getMetadata(id).getStringValue(contentTypeAttribute);
            Metadata metadata = new Metadata();
            if (inputFileName != null && !inputFileName.isEmpty()) {
                metadata.set("resourceName", inputFileName);
            }
            try {
                StringWriter writer = new StringWriter();
                WriteOutContentHandler writeOutHandler = null;
                WriteOutContentHandler contentHandler = null;
                if (exportAsHtml) {
                    contentHandler = writeOutHandler = this.createHtmlExtractWriteOutHandler(maxLength, writer);
                } else if (pageBreak) {
                    writeOutHandler = new PageBreakWriteOutContentHandler(writer, maxLength);
                    contentHandler = new BodyContentHandler((ContentHandler)writeOutHandler);
                } else {
                    writeOutHandler = new WriteOutContentHandler((Writer)writer, maxLength);
                    contentHandler = new BodyContentHandler((ContentHandler)writeOutHandler);
                }
                TemporaryResources tmp = new TemporaryResources();
                try {
                    try {
                        String[] contentTypeParts;
                        Parser p = this.getParser();
                        ParseContext context = new ParseContext();
                        context.set(Parser.class, (Object)p);
                        TikaInputStream tis = TikaInputStream.get((InputStream)dataStream, (TemporaryResources)tmp);
                        if (inputContentType != null && !inputContentType.isEmpty()) {
                            metadata.set("Content-Type", inputContentType);
                        } else {
                            MimeTypeIdentifier d = this.getMimeTypeIdentifier();
                            metadata.set("Content-Type", d.identify((InputStream)tis, this.getExtension(inputFileName)));
                            tis.reset();
                        }
                        String contentType = metadata.get("Content-Type");
                        boolean isContentTypeSupported = false;
                        if (contentType != null && (contentTypeParts = StringUtils.split((String)contentType, (char)'/')).length == 2) {
                            MediaType mtContentType = new MediaType(contentTypeParts[0], contentTypeParts[1]);
                            isContentTypeSupported = p.getSupportedTypes(context).contains(mtContentType);
                        }
                        if (!isContentTypeSupported) {
                            this._log.warn((Object)("Content type '" + contentType + "' is not supported. Document '" + inputFileName + "' may not be processed properly."));
                        }
                        p.parse((InputStream)tis, (ContentHandler)contentHandler, metadata, context);
                        this.handleMetadata(blackboard, id, contentTypeAttribute, extractProperties, metadata);
                        if (pageBreak) {
                            resultIds = this.splitOnPageBreaks(blackboard, id, pageNumberAttribute, this.handleExtractedText(keepHyphens, ((Object)writer).toString()), paramAccessor);
                        } else {
                            this.storeResult(blackboard, id, this.handleExtractedText(keepHyphens, ((Object)writer).toString()), paramAccessor);
                        }
                    }
                    catch (SAXException e) {
                        if (writeOutHandler.isWriteLimitReached((Throwable)e)) {
                            this.handleMetadata(blackboard, id, contentTypeAttribute, extractProperties, metadata);
                            if (pageBreak) {
                                resultIds = this.splitOnPageBreaks(blackboard, id, pageNumberAttribute, this.handleExtractedText(keepHyphens, ((Object)writer).toString()), paramAccessor);
                            } else {
                                this.storeResult(blackboard, id, this.handleExtractedText(keepHyphens, ((Object)writer).toString()), paramAccessor);
                            }
                        } else {
                            throw e;
                        }
                        TemporaryResourcesHelper.disposeQuietly(tmp);
                        break block23;
                    }
                }
                catch (Throwable throwable) {
                    TemporaryResourcesHelper.disposeQuietly(tmp);
                    throw throwable;
                }
                TemporaryResourcesHelper.disposeQuietly(tmp);
            }
            catch (NoClassDefFoundError e) {
                this._log.warn((Object)("Could not find java class needed to parse document " + inputFileName + "."), (Throwable)e);
                throw new IOException(e);
            }
            catch (Throwable t) {
                this._log.warn((Object)("Could not parse document " + inputFileName + "."), t);
                throw new IOException(t);
            }
        }
        this._log.debug((Object)"Parsing with Tika done.");
        return resultIds;
    }

    private String getExtension(String inputFileName) {
        int dotIndex;
        if (inputFileName != null && (dotIndex = inputFileName.lastIndexOf(46)) >= 0 && dotIndex < inputFileName.length() - 1) {
            return inputFileName.substring(dotIndex + 1);
        }
        return null;
    }

    private WriteOutContentHandler createHtmlExtractWriteOutHandler(int maxLength, Writer writer) throws TransformerFactoryConfigurationError, TransformerConfigurationException {
        SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance();
        TransformerHandler transformerHandler = factory.newTransformerHandler();
        transformerHandler.getTransformer().setOutputProperty("method", "html");
        transformerHandler.getTransformer().setOutputProperty("indent", "no");
        transformerHandler.getTransformer().setOutputProperty("encoding", "UTF-8");
        transformerHandler.getTransformer().setOutputProperty("{http://xml.apache.org/xalan}line-separator", "\n");
        transformerHandler.setResult(new StreamResult(writer));
        WriteOutContentHandler writeOutHandler = new WriteOutContentHandler((ContentHandler)transformerHandler, maxLength);
        return writeOutHandler;
    }

    private void handleMetadata(Blackboard blackboard, String id, String contentTypeAttribute, AnyMap extractProperties, Metadata metadata) throws BlackboardAccessException {
        if (contentTypeAttribute != null) {
            blackboard.getMetadata(id).put(contentTypeAttribute, metadata.get("Content-Type"));
        }
        this.mapMetadata(blackboard, id, extractProperties, metadata);
    }

    private void mapMetadata(Blackboard blackboard, String id, AnyMap extractProperties, Metadata metadata) throws BlackboardAccessException {
        String[] stringArray = metadata.names();
        int n = stringArray.length;
        int n2 = 0;
        while (n2 < n) {
            String metaDataKey = stringArray[n2];
            AnyMap mappingEntry = extractProperties.getMap(metaDataKey.toLowerCase(Locale.ENGLISH));
            if (mappingEntry != null) {
                AnySeq value;
                boolean multiResultAllowed = !mappingEntry.containsKey((Object)PROP_MAPPING_SINGLE_RESULT) || mappingEntry.getBooleanValue(PROP_MAPPING_SINGLE_RESULT) == false;
                StoreMode storeMode = StoreMode.add;
                if (mappingEntry.containsKey((Object)PROP_MAPPING_STORE_MODE)) {
                    storeMode = StoreMode.valueOf(mappingEntry.getStringValue(PROP_MAPPING_STORE_MODE));
                }
                if (metadata.isMultiValued(metaDataKey) && multiResultAllowed) {
                    value = DataFactory.DEFAULT.createAnySeq();
                    String[] stringArray2 = metadata.getValues(metaDataKey);
                    int n3 = stringArray2.length;
                    int n4 = 0;
                    while (n4 < n3) {
                        String metaDataStringValue = stringArray2[n4];
                        value.asSeq().add(metaDataStringValue);
                        ++n4;
                    }
                } else {
                    value = DataFactory.DEFAULT.createStringValue(metadata.get(metaDataKey));
                }
                String targetAttribute = mappingEntry.getStringValue(PROP_MAPPING_TARGET_ATTRIBUTE);
                if (storeMode == StoreMode.overwrite) {
                    blackboard.getMetadata(id).put(targetAttribute, (Any)value);
                } else if (storeMode == StoreMode.add) {
                    if (value.isSeq()) {
                        for (Any v : value.asSeq()) {
                            blackboard.getMetadata(id).add(targetAttribute, v);
                        }
                    } else if (value.isValue()) {
                        if (blackboard.getMetadata(id).get((Object)targetAttribute) == null) {
                            blackboard.getMetadata(id).put(targetAttribute, (Any)value);
                        } else {
                            blackboard.getMetadata(id).add(targetAttribute, (Any)value);
                        }
                    }
                }
            }
            ++n2;
        }
    }

    private String handleExtractedText(boolean keepHyphens, String inputText) {
        if (keepHyphens) {
            return inputText.trim();
        }
        return inputText.trim().replaceAll("(?<=\\p{L})- ?(?:[\r\n\u0085\u2028\u2029]+)(?=\\p{L})", "");
    }

    private synchronized Parser getParser() throws ProcessingException {
        if (this._parser == null) {
            try {
                this._parser = (Parser)ServiceUtils.getService(Parser.class);
            }
            catch (Exception ex) {
                this._log.warn((Object)"Error while waiting for Tika Parser service to come up.", (Throwable)ex);
            }
            if (this._parser == null) {
                throw new ProcessingException("No Tika Parser service available, giving up");
            }
        }
        return this._parser;
    }

    private synchronized MimeTypeIdentifier getMimeTypeIdentifier() throws ProcessingException {
        if (this._identifier == null) {
            try {
                this._identifier = (MimeTypeIdentifier)ServiceUtils.getService(MimeTypeIdentifier.class);
            }
            catch (Exception ex) {
                this._log.warn((Object)"Error while waiting for MimeTypeIdentifier service to come up.", (Throwable)ex);
            }
            if (this._identifier == null) {
                throw new ProcessingException("No MimeTypeIdentifier service available, giving up");
            }
        }
        return this._identifier;
    }

    private ArrayList<String> splitOnPageBreaks(Blackboard blackboard, String id, String pageNumberAttribute, String text, ParameterAccessor paramAccessor) throws ProcessingException, BlackboardAccessException {
        ArrayList<String> ids = new ArrayList<String>();
        String[] pages = text.split("<pagebreak/>");
        if (pages.length <= 1) {
            this.storeResult(blackboard, id, text, paramAccessor);
            ids.add(id);
        } else {
            int i = 0;
            while (i < pages.length) {
                int page = i + 1;
                String generatedid = String.valueOf(id) + "#" + page;
                blackboard.getRecord(generatedid, Blackboard.Get.NEW);
                if (pageNumberAttribute != null) {
                    blackboard.getMetadata(generatedid).put(pageNumberAttribute, (Number)page);
                }
                this.storeResult(blackboard, generatedid, pages[i], paramAccessor);
                ids.add(generatedid);
                ++i;
            }
        }
        return ids;
    }

    public static enum StoreMode {
        leave,
        overwrite,
        add;

    }
}

