package org.datacleaner.beans;

import com.ibm.icu.text.UnicodeSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import javax.inject.Inject;
import javax.inject.Named;
import org.datacleaner.api.Analyzer;
import org.datacleaner.api.Concurrent;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.ExternalDocumentation;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.Provided;
import org.datacleaner.result.AnnotatedRowsResult;
import org.datacleaner.result.CharacterSetDistributionResult;
import org.datacleaner.result.Crosstab;
import org.datacleaner.result.CrosstabDimension;
import org.datacleaner.result.CrosstabNavigator;
import org.datacleaner.storage.RowAnnotation;
import org.datacleaner.storage.RowAnnotationFactory;

@ExternalDocumentation({@ExternalDocumentation.DocumentationLink(title = "Internationalization in DataCleaner", url = "https://www.youtube.com/watch?v=ApA-nhtLbhI", type = ExternalDocumentation.DocumentationType.VIDEO, version = "3.0")})
@Named("Character set distribution")
@Description("Inspects and maps text characters according to character set affinity, such as Latin, Hebrew, Cyrillic, Chinese and more.")
@Concurrent(true)
/* loaded from: input_file:org/datacleaner/beans/CharacterSetDistributionAnalyzer.class */
public class CharacterSetDistributionAnalyzer implements Analyzer<CharacterSetDistributionResult> {
    private static final Map<String, UnicodeSet> UNICODE_SETS = createUnicodeSets();
    private final Map<InputColumn<String>, CharacterSetDistributionAnalyzerColumnDelegate> _columnDelegates = new HashMap();

    @Inject
    @Configured
    InputColumn<String>[] _columns;

    @Inject
    @Provided
    RowAnnotationFactory _annotationFactory;

    protected static Map<String, UnicodeSet> createUnicodeSets() {
        TreeMap treeMap = new TreeMap();
        treeMap.put("Latin, ASCII", new UnicodeSet("[:ASCII:]"));
        treeMap.put("Latin, non-ASCII", subUnicodeSet("[:Latin:]", "[:ASCII:]"));
        treeMap.put("Arabic", new UnicodeSet("[:Script=Arabic:]"));
        treeMap.put("Armenian", new UnicodeSet("[:Script=Armenian:]"));
        treeMap.put("Bengali", new UnicodeSet("[:Script=Bengali:]"));
        treeMap.put("Cyrillic", new UnicodeSet("[:Script=Cyrillic:]"));
        treeMap.put("Devanagari", new UnicodeSet("[:Script=Devanagari:]"));
        treeMap.put("Greek", new UnicodeSet("[:Script=Greek:]"));
        treeMap.put("Han", new UnicodeSet("[:Script=Han:]"));
        treeMap.put("Gujarati", new UnicodeSet("[:Script=Gujarati:]"));
        treeMap.put("Georgian", new UnicodeSet("[:Script=Georgian:]"));
        treeMap.put("Gurmukhi", new UnicodeSet("[:Script=Gurmukhi:]"));
        treeMap.put("Hangul", new UnicodeSet("[:Script=Hangul:]"));
        treeMap.put("Hebrew", new UnicodeSet("[:Script=Hebrew:]"));
        treeMap.put("Hiragana", new UnicodeSet("[:Script=Hiragana:]"));
        treeMap.put("Kannada", new UnicodeSet("[:Script=Kannada:]"));
        treeMap.put("Katakana", new UnicodeSet("[:Script=Katakana:]"));
        treeMap.put("Malayalam", new UnicodeSet("[:Script=Malayalam:]"));
        treeMap.put("Oriya", new UnicodeSet("[:Script=Oriya:]"));
        treeMap.put("Syriac", new UnicodeSet("[:Script=Syriac:]"));
        treeMap.put("Tamil", new UnicodeSet("[:Script=Tamil:]"));
        treeMap.put("Telugu", new UnicodeSet("[:Script=Telugu:]"));
        treeMap.put("Thaana", new UnicodeSet("[:Script=Thaana:]"));
        treeMap.put("Thai", new UnicodeSet("[:Script=Thai:]"));
        return treeMap;
    }

    private static UnicodeSet subUnicodeSet(String str, String str2) {
        UnicodeSet unicodeSet = new UnicodeSet();
        unicodeSet.addAll(new UnicodeSet(str));
        unicodeSet.removeAll(new UnicodeSet(str2));
        return unicodeSet;
    }

    @Initialize
    public void init() {
        for (InputColumn<String> inputColumn : this._columns) {
            this._columnDelegates.put(inputColumn, new CharacterSetDistributionAnalyzerColumnDelegate(this._annotationFactory, UNICODE_SETS));
        }
    }

    public void run(InputRow inputRow, int i) {
        for (InputColumn<String> inputColumn : this._columns) {
            this._columnDelegates.get(inputColumn).run((String) inputRow.getValue(inputColumn), inputRow, i);
        }
    }

    /* renamed from: getResult, reason: merged with bridge method [inline-methods] */
    public CharacterSetDistributionResult m1getResult() {
        CrosstabDimension crosstabDimension = new CrosstabDimension("Measures");
        Set<String> keySet = UNICODE_SETS.keySet();
        Iterator<String> it = keySet.iterator();
        while (it.hasNext()) {
            crosstabDimension.addCategory(it.next());
        }
        CrosstabDimension crosstabDimension2 = new CrosstabDimension("Column");
        Crosstab crosstab = new Crosstab(Number.class, new CrosstabDimension[]{crosstabDimension2, crosstabDimension});
        for (InputColumn<String> inputColumn : this._columns) {
            String name = inputColumn.getName();
            CharacterSetDistributionAnalyzerColumnDelegate characterSetDistributionAnalyzerColumnDelegate = this._columnDelegates.get(inputColumn);
            crosstabDimension2.addCategory(name);
            CrosstabNavigator where = crosstab.navigate().where(crosstabDimension2, name);
            for (String str : keySet) {
                RowAnnotation annotation = characterSetDistributionAnalyzerColumnDelegate.getAnnotation(str);
                int rowCount = annotation.getRowCount();
                where.where(crosstabDimension, str).put(Integer.valueOf(rowCount));
                if (rowCount > 0) {
                    where.attach(new AnnotatedRowsResult(annotation, this._annotationFactory, new InputColumn[]{inputColumn}));
                }
            }
        }
        return new CharacterSetDistributionResult(this._columns, keySet, (Crosstab<Number>) crosstab);
    }
}
