/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.web.filter;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.crawler.web.LinkFilter;
import org.eclipse.smila.importing.crawler.web.WebCrawlerException;
import org.eclipse.smila.importing.crawler.web.WebCrawlingContext;
import org.eclipse.smila.importing.crawler.web.filter.FilterConfiguration;
import org.eclipse.smila.taskworker.TaskLog;

public class DefaultLinkFilter
implements LinkFilter {
    @Override
    public Collection<Record> filterLinks(Collection<Record> extractedLinks, WebCrawlingContext context) throws WebCrawlerException {
        HashSet<String> links = new HashSet<String>();
        ArrayList<Record> filteredLinks = new ArrayList<Record>(extractedLinks.size());
        for (Record link : extractedLinks) {
            String url = link.getMetadata().getStringValue("httpUrl");
            if (!this.isLinkAllowed(context.getFilterConfiguration(), url, links, context.getTaskLog())) continue;
            filteredLinks.add(link);
        }
        return filteredLinks;
    }

    @Override
    public boolean allowLink(String url, WebCrawlingContext context) throws WebCrawlerException {
        FilterConfiguration filterConfig;
        return url != null && (filterConfig = context.getFilterConfiguration()) != null && filterConfig.getUrlPatternMatcher().matches(url);
    }

    private boolean isLinkAllowed(FilterConfiguration filterConfig, String url, Set<String> links, TaskLog log) {
        if (!links.add(url)) {
            return false;
        }
        return filterConfig == null || filterConfig.getUrlPatternMatcher().matches(url);
    }
}

