package org.datacleaner.extension.filter;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.regex.Pattern;
import javax.inject.Named;
import org.apache.commons.lang3.StringUtils;
import org.apache.metamodel.util.HasName;
import org.datacleaner.api.Alias;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Distributed;
import org.datacleaner.api.Filter;
import org.datacleaner.api.HasLabelAdvice;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.Validate;
import org.datacleaner.beans.filter.ValidationCategory;
import org.datacleaner.components.categories.FilterCategory;
import org.datacleaner.extension.constants.RegexPattern;

@Distributed(false)
@Categorized({FilterCategory.class})
@Named("Specia character filter")
@Description("Specia character filter")
/* loaded from: input_file:org/datacleaner/extension/filter/SpeciaCharacterFilter.class */
public class SpeciaCharacterFilter implements Filter<ValidationCategory>, HasLabelAdvice {

    @Configured
    @Alias({"inputColumn"})
    InputColumn<?> inputColumn;

    @Configured
    Boolean validFullWidth = true;

    @Configured
    Boolean validHalfWidth = true;

    @Configured
    Boolean validGarbled = true;

    @Configured
    @Alias({"charset"})
    @Description("charset eg: UTF-8/GB2312/GBK")
    CharsetEnum charset;

    @Configured(required = false)
    @Alias({"inputCharset"})
    @Description("charset eg: UTF-8/GB2312/GBK")
    String inputCharset;
    Pattern fullWidthPattern;
    Pattern halfWidthPattern;
    Pattern garbledPattern;

    /* loaded from: input_file:org/datacleaner/extension/filter/SpeciaCharacterFilter$CharsetEnum.class */
    public enum CharsetEnum implements HasName {
        NONE(""),
        UTF8("UTF-8"),
        GB2312("GB2312"),
        US_ASCII("US-ASCII"),
        GBK("GBK");

        private final String _name;

        CharsetEnum(String str) {
            this._name = str;
        }

        public String getName() {
            return this._name;
        }
    }

    @Validate
    public void validate() {
        if (this.validFullWidth == null) {
            throw new IllegalStateException("Valid full width is null.");
        }
        if (this.validHalfWidth == null) {
            throw new IllegalStateException("Valid half width is null.");
        }
        if (this.validGarbled == null) {
            throw new IllegalStateException("Valid garbled is null.");
        }
        if (!this.validFullWidth.booleanValue() && !this.validHalfWidth.booleanValue() && !this.validGarbled.booleanValue()) {
            throw new IllegalStateException("At least one of validFullWidth, validHalfWidth, validGarbled is set to true.");
        }
    }

    @Initialize
    public void init() {
        this.fullWidthPattern = Pattern.compile(RegexPattern.FULL_WIDTH.getPattern());
        this.halfWidthPattern = Pattern.compile(RegexPattern.HALF_WIDTH.getPattern());
        this.garbledPattern = Pattern.compile(RegexPattern.GARBLED.getPattern());
    }

    public String getSuggestedLabel() {
        if (this.inputColumn == null) {
            return null;
        }
        String name = CharsetEnum.UTF8.getName();
        if (null != this.charset && !StringUtils.isBlank(this.charset.getName())) {
            name = this.charset.getName();
        } else if (null == this.charset && !StringUtils.isBlank(this.inputCharset)) {
            name = this.inputCharset;
        }
        return this.inputColumn.getName() + " is " + name;
    }

    /* renamed from: categorize, reason: merged with bridge method [inline-methods] */
    public ValidationCategory m22categorize(InputRow inputRow) {
        Object value = inputRow.getValue(this.inputColumn);
        if (null == value || StringUtils.isBlank(value.toString())) {
            return ValidationCategory.VALID;
        }
        String obj = value.toString();
        return (this.validGarbled.booleanValue() && isGarbled(obj)) ? ValidationCategory.INVALID : (this.validFullWidth.booleanValue() && this.fullWidthPattern.matcher(obj).find()) ? ValidationCategory.INVALID : (this.validHalfWidth.booleanValue() && this.halfWidthPattern.matcher(obj).find()) ? ValidationCategory.INVALID : ValidationCategory.VALID;
    }

    private boolean isGarbled(String str) {
        String name = CharsetEnum.UTF8.getName();
        if (null != this.charset && !StringUtils.isBlank(this.charset.getName())) {
            name = this.charset.getName();
        } else if (null == this.charset && !StringUtils.isBlank(this.inputCharset)) {
            name = this.inputCharset;
        }
        Charset forName = Charset.forName(name);
        CharsetDecoder newDecoder = forName.newDecoder();
        newDecoder.onMalformedInput(CodingErrorAction.REPORT);
        newDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
        try {
            newDecoder.decode(ByteBuffer.wrap(str.getBytes(forName)));
            return this.garbledPattern.matcher(str).matches();
        } catch (CharacterCodingException e) {
            return true;
        }
    }
}
