package elki.datasource.parser;

import elki.data.LabelList;
import elki.data.SparseFloatVector;
import elki.data.SparseNumberVector;
import elki.data.type.SimpleTypeInformation;
import elki.data.type.VectorFieldTypeInformation;
import elki.data.type.VectorTypeInformation;
import elki.datasource.parser.NumberVectorLabelParser;
import elki.logging.Logging;
import elki.utilities.exceptions.AbortException;
import elki.utilities.io.ParseUtil;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.Flag;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import it.unimi.dsi.fastutil.ints.Int2DoubleMap;
import it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import java.util.ArrayList;

/* loaded from: input_file:elki/datasource/parser/TermFrequencyParser.class */
public class TermFrequencyParser<V extends SparseNumberVector> extends NumberVectorLabelParser<V> {
    private static final Logging LOG = Logging.getLogger(TermFrequencyParser.class);
    int numterms;
    Object2IntOpenHashMap<String> keymap;
    boolean normalize;
    private SparseNumberVector.Factory<V> sparsefactory;
    Int2DoubleOpenHashMap values;
    ArrayList<String> labels;

    /* loaded from: input_file:elki/datasource/parser/TermFrequencyParser$Par.class */
    public static class Par<V extends SparseNumberVector> extends NumberVectorLabelParser.Par<V> {
        public static final OptionID NORMALIZE_FLAG = new OptionID("tf.normalize", "Normalize vectors to manhattan length 1 (convert term counts to term frequencies)");
        boolean normalize = false;

        @Override // elki.datasource.parser.NumberVectorLabelParser.Par, elki.datasource.parser.AbstractStreamingParser.Par
        public void configure(Parameterization parameterization) {
            super.configure(parameterization);
            new Flag(NORMALIZE_FLAG).grab(parameterization, z -> {
                this.normalize = z;
            });
        }

        @Override // elki.datasource.parser.NumberVectorLabelParser.Par
        protected void getFactory(Parameterization parameterization) {
            new ObjectParameter(VECTOR_TYPE_ID, SparseNumberVector.Factory.class, SparseFloatVector.Factory.class).grab(parameterization, factory -> {
                this.factory = factory;
            });
        }

        @Override // elki.datasource.parser.NumberVectorLabelParser.Par, elki.datasource.parser.AbstractStreamingParser.Par
        /* renamed from: make */
        public TermFrequencyParser<V> mo131make() {
            return new TermFrequencyParser<>(this.normalize, this.format, this.labelIndices, this.factory);
        }
    }

    public TermFrequencyParser(boolean z, SparseNumberVector.Factory<V> factory) {
        this(z, CSVReaderFormat.DEFAULT_FORMAT, null, factory);
    }

    public TermFrequencyParser(boolean z, CSVReaderFormat cSVReaderFormat, long[] jArr, SparseNumberVector.Factory<V> factory) {
        super(cSVReaderFormat, jArr, factory);
        this.values = new Int2DoubleOpenHashMap();
        this.labels = new ArrayList<>();
        this.normalize = z;
        this.keymap = new Object2IntOpenHashMap<>();
        this.keymap.defaultReturnValue(-1);
        this.sparsefactory = factory;
        this.warnedDim = true;
    }

    @Override // elki.datasource.parser.NumberVectorLabelParser
    protected boolean parseLineInternal() {
        double d = 0.0d;
        String str = null;
        int i = 0;
        while (this.tokenizer.valid()) {
            int i2 = i;
            i++;
            if (isLabelColumn(i2)) {
                this.labels.add(this.tokenizer.getSubstring());
            } else if (str == null) {
                str = this.tokenizer.getSubstring();
            } else {
                try {
                    double d2 = this.tokenizer.getDouble();
                    int i3 = this.keymap.getInt(str);
                    if (i3 < 0) {
                        i3 = this.numterms;
                        this.keymap.put(str, i3);
                        this.numterms++;
                    }
                    this.values.put(i3, d2);
                    d += d2;
                    str = null;
                } catch (NumberFormatException e) {
                    if (!this.warnedPrecision && (e == ParseUtil.PRECISION_OVERFLOW || e == ParseUtil.EXPONENT_OVERFLOW)) {
                        getLogger().warning("Too many digits in what looked like a double number - treating as string: " + this.tokenizer.getSubstring());
                        this.warnedPrecision = true;
                    }
                    this.labels.add(str);
                    str = this.tokenizer.getSubstring();
                }
            }
            this.tokenizer.advance();
        }
        if (str != null) {
            this.labels.add(str);
        }
        this.haslabels |= !this.labels.isEmpty();
        if (this.normalize && Math.abs(d - 1.0d) > Double.MIN_NORMAL) {
            ObjectIterator fastIterator = this.values.int2DoubleEntrySet().fastIterator();
            while (fastIterator.hasNext()) {
                Int2DoubleMap.Entry entry = (Int2DoubleMap.Entry) fastIterator.next();
                entry.setValue(entry.getDoubleValue() / d);
            }
        }
        this.curvec = this.sparsefactory.newNumberVector(this.values, this.numterms);
        this.curlbl = LabelList.make(this.labels);
        this.values.clear();
        this.labels.clear();
        return true;
    }

    @Override // elki.datasource.parser.NumberVectorLabelParser
    protected SimpleTypeInformation<V> getTypeInformation(int i, int i2) {
        if (i == i2) {
            return new VectorFieldTypeInformation(this.factory, i);
        }
        if (i < i2) {
            return new VectorTypeInformation(this.factory, this.factory.getDefaultSerializer(), i, i2);
        }
        throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
    }

    @Override // elki.datasource.parser.NumberVectorLabelParser, elki.datasource.parser.AbstractStreamingParser
    protected Logging getLogger() {
        return LOG;
    }
}
