/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.file;

import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.ImportingException;
import org.eclipse.smila.importing.VisitedLinksService;
import org.eclipse.smila.importing.compounds.CompoundExtractor;
import org.eclipse.smila.importing.crawler.file.FileCrawlerService;
import org.eclipse.smila.importing.crawler.file.FileCrawlingContext;
import org.eclipse.smila.importing.util.RecordOutputHandler;
import org.eclipse.smila.objectstore.ObjectStoreException;
import org.eclipse.smila.taskworker.TaskContext;
import org.eclipse.smila.taskworker.TaskLog;
import org.eclipse.smila.taskworker.Worker;
import org.eclipse.smila.taskworker.input.Inputs;
import org.eclipse.smila.taskworker.input.RecordInput;

public class FileCrawlerWorker
implements Worker {
    public static final String NAME = "fileCrawler";
    public static final String INPUT_SLOT_DIRS_TO_CRAWL = "directoriesToCrawl";
    public static final String OUTPUT_SLOT_DIRS_TO_CRAWL = "directoriesToCrawl";
    public static final String OUTPUT_SLOT_CRAWLED_RECORDS = "crawledRecords";
    public static final String TASK_PARAM_ROOT_FOLDER = "rootFolder";
    public static final String TASK_PARAM_MAX_FILES_PER_BULK = "maxFilesPerBulk";
    public static final String TASK_PARAM_MIN_FILES_PER_BULK = "minFilesPerBulk";
    public static final String TASK_PARAM_DIRS_PER_BULK = "directoriesPerBulk";
    public static final Long MAX_FILES_PER_BULK_DEFAULT = 1000L;
    public static final Long MIN_FILES_PER_BULK_DEFAULT = 100L;
    public static final Long DIRS_PER_BULK_DEFAULT = 10L;
    private static final String BULK_ID_FOR_INITIAL_TASK = "initial";
    private final Log _log = LogFactory.getLog(this.getClass());
    private FileCrawlerService _fileCrawler;
    private VisitedLinksService _visitedLinks;
    private CompoundExtractor _compoundExtractor;

    public String getName() {
        return NAME;
    }

    public void perform(TaskContext taskContext) throws Exception {
        try {
            FileCrawlingContext crawlContext = new FileCrawlingContext(taskContext);
            RecordOutputHandler fileBulks = new RecordOutputHandler(taskContext.getOutputs(), crawlContext.getMaxFilesPerBulk(), OUTPUT_SLOT_CRAWLED_RECORDS);
            Inputs inputs = taskContext.getInputs();
            if (inputs.getDataObjectCount("directoriesToCrawl") == 0) {
                this.crawlRootFolder(crawlContext, fileBulks);
            } else {
                this.crawlInputFolders(crawlContext, fileBulks);
            }
        }
        catch (Exception e) {
            this._log.error((Object)e);
            throw e;
        }
    }

    private void crawlRootFolder(FileCrawlingContext crawlContext, RecordOutputHandler fileBulks) throws Exception {
        this._visitedLinks.clearSource(crawlContext.getDataSource());
        RecordOutputHandler directoryBulks = new RecordOutputHandler(crawlContext.getTaskContext().getOutputs(), crawlContext.getDirectoriesPerBulk(), "directoriesToCrawl");
        this.crawl(crawlContext.getRootFolder(), crawlContext, fileBulks, directoryBulks, BULK_ID_FOR_INITIAL_TASK);
    }

    private void crawlInputFolders(FileCrawlingContext crawlContext, RecordOutputHandler fileBulks) throws ObjectStoreException, IOException {
        RecordOutputHandler directoryBulks = new RecordOutputHandler(crawlContext.getTaskContext().getOutputs(), crawlContext.getDirectoriesPerBulk(), "directoriesToCrawl");
        RecordInput directoryInput = crawlContext.getTaskContext().getInputs().getAsRecordInput("directoriesToCrawl");
        Record record = directoryInput.getRecord();
        while (record != null && !crawlContext.getTaskContext().isCanceled()) {
            String dirName = record.getMetadata().getStringValue("fileFolder");
            TaskLog taskLog = crawlContext.getTaskContext().getLog();
            try {
                if (dirName == null || dirName.trim().isEmpty()) {
                    taskLog.error("Failed to crawl directory. Attribute 'fileFolder' of record " + record.getId() + " is null or empty");
                } else {
                    this.crawl(dirName, crawlContext, fileBulks, directoryBulks, directoryInput.getObjectName());
                }
            }
            catch (Exception e) {
                taskLog.warn("Failed to crawl directory '" + dirName + "' of record " + record.getId(), (Throwable)e);
            }
            record = directoryInput.getRecord();
        }
    }

    private void crawl(String dirName, FileCrawlingContext crawlContext, RecordOutputHandler fileBulks, RecordOutputHandler directoryBulks, String inputBulkId) throws Exception {
        Path dir;
        if (this._log.isDebugEnabled()) {
            this._log.debug((Object)("Task " + crawlContext.getTaskContext().getTask().getTaskId() + ": crawl " + dirName));
        }
        if (!Files.isDirectory(dir = Paths.get(dirName, new String[0]), new LinkOption[0])) {
            throw new IllegalArgumentException("fileFolder '" + dirName + "' is not a directory");
        }
        if (this.checkAndMarkVisited(dir, crawlContext, inputBulkId)) {
            this._log.info((Object)("Not crawling into directory " + dir + ", because it has been visited before."));
            return;
        }
        Queue<Path> subDirectories = this.createFileOutputBulks(dir, crawlContext, fileBulks);
        long minFilesPerBulk = crawlContext.getMinFilesPerBulk();
        if ((long)fileBulks.getFileCount() < minFilesPerBulk) {
            while (!subDirectories.isEmpty() && !crawlContext.getTaskContext().isCanceled() && (long)fileBulks.getFileCount() < minFilesPerBulk) {
                Path subDir = subDirectories.poll();
                if (this.checkAndMarkVisited(subDir, crawlContext, inputBulkId)) {
                    this._log.info((Object)("Not crawling into directory " + dir + ", because it has been visited before."));
                    continue;
                }
                Queue<Path> subDirDirectories = this.createFileOutputBulks(subDir, crawlContext, fileBulks);
                subDirectories.addAll(subDirDirectories);
            }
        }
        this.createDirectoryOutputBulks(crawlContext.getDataSource(), subDirectories, directoryBulks);
    }

    private Queue<Path> createFileOutputBulks(Path dir, FileCrawlingContext crawlContext, RecordOutputHandler fileBulks) throws Exception {
        LinkedList<Path> subDirectories = new LinkedList<Path>();
        Collection<Path> filesAndFolders = this._fileCrawler.list(dir);
        int fileCount = 0;
        for (Path file : filesAndFolders) {
            if (Files.isRegularFile(file, new LinkOption[0]) && crawlContext.getFilterEvaluator().applyFiltersForCrawledFile(dir, file)) {
                Record fileRecord = this._fileCrawler.fileToRecord(file, crawlContext.getDataSource(), false);
                if (this.isCompoundRecord(fileRecord)) {
                    this.setIsCompound(fileRecord);
                }
                crawlContext.getMapper().mapNames(fileRecord, this._fileCrawler.getFilePropertyNames());
                fileBulks.writeRecord(fileRecord);
                ++fileCount;
                continue;
            }
            if (Files.isDirectory(file, new LinkOption[0]) && crawlContext.getFilterEvaluator().applyFiltersForCrawledFolder(file, crawlContext.getRootFolder())) {
                Throwable throwable = null;
                Object var10_11 = null;
                try (DirectoryStream<Path> dirStream = Files.newDirectoryStream(file);){
                    if (!dirStream.iterator().hasNext()) continue;
                    subDirectories.add(file);
                    continue;
                }
                catch (Throwable throwable2) {
                    if (throwable == null) {
                        throwable = throwable2;
                    } else if (throwable != throwable2) {
                        throwable.addSuppressed(throwable2);
                    }
                    throw throwable;
                }
            }
            if (Files.isRegularFile(file, new LinkOption[0]) || Files.isDirectory(file, new LinkOption[0])) continue;
            this._log.warn((Object)("directory " + dir.toRealPath(new LinkOption[0]) + " contains object " + file.getFileName() + " which is neither a file nor a directory or cannot be accessed."));
        }
        if (this._log.isInfoEnabled()) {
            this._log.info((Object)("directory " + dir.toRealPath(new LinkOption[0]) + " contained " + fileCount + " files and " + subDirectories.size() + " directories."));
        }
        return subDirectories;
    }

    private void createDirectoryOutputBulks(String dataSource, Collection<Path> subDirectories, RecordOutputHandler directoryBulks) throws IOException, ObjectStoreException {
        for (Path subDir : subDirectories) {
            Record directoryRecord = this._fileCrawler.dirToRecord(subDir, dataSource);
            directoryBulks.writeRecord(directoryRecord);
            if (!this._log.isDebugEnabled()) continue;
            this._log.debug((Object)("added bulk for directory " + subDir.toRealPath(new LinkOption[0])));
        }
    }

    private boolean checkAndMarkVisited(Path directory, FileCrawlingContext context, String inputBulkId) throws ImportingException {
        if (context.getFilterConfig() != null && context.getFilterConfig().followSymbolicLinks()) {
            try {
                String canonicalPath = this.getCanonicalPath(directory);
                return this._visitedLinks.checkAndMarkVisited(context.getDataSource(), canonicalPath, context.getJobRunId(), inputBulkId);
            }
            catch (IOException ex) {
                throw new ImportingException("Error checking " + directory + " for symbolic-link cycles", (Throwable)ex);
            }
        }
        return false;
    }

    private String getCanonicalPath(Path file) throws IOException {
        return file.toRealPath(new LinkOption[0]).toString();
    }

    private boolean isCompoundRecord(Record record) {
        return this._compoundExtractor.canExtract(record.getMetadata().getStringValue("fileName"), null);
    }

    private void setIsCompound(Record record) {
        record.getMetadata().put("_isCompound", Boolean.valueOf(true));
    }

    public void setFileCrawlerService(FileCrawlerService fileCrawler) {
        this._fileCrawler = fileCrawler;
    }

    public void unsetFileCrawlerService(FileCrawlerService fileCrawler) {
        if (this._fileCrawler == fileCrawler) {
            this._fileCrawler = null;
        }
    }

    public void setCompoundExtractor(CompoundExtractor compoundExtractor) {
        this._compoundExtractor = compoundExtractor;
    }

    public void unsetCompoundExtractor(CompoundExtractor compoundExtractor) {
        if (this._compoundExtractor == compoundExtractor) {
            this._compoundExtractor = null;
        }
    }

    public void setVisitedLinks(VisitedLinksService visitedLinks) {
        this._visitedLinks = visitedLinks;
    }

    public void unsetVisitedLinks(VisitedLinksService visitedLinks) {
        if (this._visitedLinks == visitedLinks) {
            this._visitedLinks = null;
        }
    }
}

