/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.datasource.parser;

import de.lmu.ifi.dbs.elki.data.AbstractNumberVector;
import de.lmu.ifi.dbs.elki.data.ClassLabel;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.ExternalID;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.SimpleClassLabel;
import de.lmu.ifi.dbs.elki.data.SparseDoubleVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.datasource.parser.Parser;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
import gnu.trove.map.TIntDoubleMap;
import gnu.trove.map.hash.TIntDoubleHashMap;
import gnu.trove.map.hash.TIntObjectHashMap;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ArffParser
implements Parser {
    private static final Logging LOG = Logging.getLogger(ArffParser.class);
    public static final Matcher ARFF_HEADER_RELATION = Pattern.compile("@relation\\s+(.*)", 2).matcher("");
    public static final Matcher ARFF_HEADER_ATTRIBUTE = Pattern.compile("@attribute\\s+([^ ]+|['\"].*?['\"])\\s+(numeric|real|integer|string|double|date(\\s.*)|\\{.*\\})\\s*", 2).matcher("");
    public static final Matcher ARFF_HEADER_DATA = Pattern.compile("@data\\s*", 2).matcher("");
    public static final Matcher ARFF_COMMENT = Pattern.compile("^\\s*%.*").matcher("");
    public static final String DEFAULT_ARFF_MAGIC_EID = "(External-?ID)";
    public static final String DEFAULT_ARFF_MAGIC_CLASS = "(Class|Class-?Label)";
    public static final Matcher ARFF_NUMERIC = Pattern.compile("(numeric|real|integer|double)", 2).matcher("");
    public static final Matcher EMPTY = Pattern.compile("^\\s*$").matcher("");
    Matcher magic_eid;
    Matcher magic_class;
    ArrayList<String> labels = new ArrayList();

    public ArffParser(Pattern pattern, Pattern pattern2) {
        this.magic_eid = pattern.matcher("");
        this.magic_class = pattern2.matcher("");
    }

    public ArffParser(String string, String string2) {
        this(Pattern.compile(string, 2), Pattern.compile(string2, 2));
    }

    @Override
    public MultipleObjectsBundle parse(InputStream inputStream) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
            ArrayList<String> arrayList = new ArrayList<String>();
            ArrayList<String> arrayList2 = new ArrayList<String>();
            this.readHeader(bufferedReader);
            this.parseAttributeStatements(bufferedReader, arrayList, arrayList2);
            int[] nArray = new int[arrayList.size()];
            TypeInformation[] typeInformationArray = new TypeInformation[arrayList.size()];
            int[] nArray2 = new int[arrayList.size()];
            this.processColumnTypes(arrayList, arrayList2, nArray, typeInformationArray, nArray2);
            MultipleObjectsBundle multipleObjectsBundle = new MultipleObjectsBundle();
            StreamTokenizer streamTokenizer = this.makeArffTokenizer(bufferedReader);
            int n = 0;
            this.nextToken(streamTokenizer);
            while (streamTokenizer.ttype != -1) {
                if (streamTokenizer.ttype != 10) {
                    if (streamTokenizer.ttype != 123) {
                        if (n == 0) {
                            this.setupBundleHeaders(arrayList, nArray, typeInformationArray, nArray2, multipleObjectsBundle, false);
                            n = 1;
                        }
                        if (n != 1) {
                            throw new AbortException("Mixing dense and sparse vectors is currently not allowed.");
                        }
                        multipleObjectsBundle.appendSimple(this.loadDenseInstance(streamTokenizer, nArray2, typeInformationArray, multipleObjectsBundle.metaLength()));
                    } else {
                        if (n == 0) {
                            this.setupBundleHeaders(arrayList, nArray, typeInformationArray, nArray2, multipleObjectsBundle, true);
                            n = 2;
                        }
                        if (n != 2) {
                            throw new AbortException("Mixing dense and sparse vectors is currently not allowed.");
                        }
                        multipleObjectsBundle.appendSimple(this.loadSparseInstance(streamTokenizer, nArray, nArray2, typeInformationArray, multipleObjectsBundle.metaLength()));
                    }
                }
                if (streamTokenizer.ttype == -1) continue;
                this.nextToken(streamTokenizer);
            }
            return multipleObjectsBundle;
        }
        catch (IOException iOException) {
            throw new AbortException("IO error in parser", iOException);
        }
    }

    private Object[] loadSparseInstance(StreamTokenizer streamTokenizer, int[] nArray, int[] nArray2, TypeInformation[] typeInformationArray, int n) throws IOException {
        TIntObjectHashMap tIntObjectHashMap;
        block19: {
            tIntObjectHashMap = new TIntObjectHashMap();
            while (true) {
                this.nextToken(streamTokenizer);
                assert (streamTokenizer.ttype != -1 && streamTokenizer.ttype != 10);
                if (streamTokenizer.ttype == 125) {
                    this.nextToken(streamTokenizer);
                    assert (streamTokenizer.ttype == -1 || streamTokenizer.ttype == 10);
                    break block19;
                }
                if (streamTokenizer.ttype != -3) {
                    throw new AbortException("Unexpected token type encountered: " + streamTokenizer.toString() + " type: " + streamTokenizer.ttype);
                }
                int n2 = Integer.valueOf(streamTokenizer.sval);
                if (tIntObjectHashMap.containsKey(n2)) {
                    throw new AbortException("Duplicate key in sparse vector: " + streamTokenizer.toString());
                }
                this.nextToken(streamTokenizer);
                if (streamTokenizer.ttype != -3) break;
                if (TypeUtil.NUMBER_VECTOR_FIELD.equals(typeInformationArray[nArray[n2]])) {
                    tIntObjectHashMap.put(n2, (Object)FormatUtil.parseDouble(streamTokenizer.sval));
                    continue;
                }
                tIntObjectHashMap.put(n2, (Object)streamTokenizer.sval);
            }
            throw new AbortException("Unexpected token type encountered: " + streamTokenizer.toString());
        }
        Object[] objectArray = new Object[n];
        for (int i = 0; i < n; ++i) {
            Object object;
            Object object2;
            int n3 = -1;
            for (int j = 0; j < nArray.length; ++j) {
                if (nArray[j] != i || n3 >= 0) continue;
                n3 = j;
                break;
            }
            assert (n3 >= 0);
            if (TypeUtil.NUMBER_VECTOR_FIELD.equals(typeInformationArray[i])) {
                object2 = new TIntDoubleHashMap(nArray2[i]);
                object = tIntObjectHashMap.iterator();
                while (object.hasNext()) {
                    object.advance();
                    int n4 = object.key();
                    if (n4 < n3) continue;
                    if (n4 >= n3 + nArray2[i]) break;
                    double d = (Double)object.value();
                    object2.put(n4 - n3, d);
                }
                objectArray[i] = new SparseDoubleVector((TIntDoubleMap)object2, nArray2[i]);
                continue;
            }
            if (TypeUtil.LABELLIST.equals(typeInformationArray[i])) {
                this.labels.clear();
                object2 = tIntObjectHashMap.iterator();
                while (object2.hasNext()) {
                    object2.advance();
                    int n5 = object2.key();
                    if (n5 < n3) continue;
                    if (n5 >= n3 + nArray2[i]) break;
                    String string = (String)object2.value();
                    if (this.labels.size() < n5 - n3) {
                        LOG.warning("Sparse consecutive labels are currently not correctly supported.");
                    }
                    this.labels.add(string);
                }
                objectArray[i] = LabelList.make(this.labels);
                continue;
            }
            if (TypeUtil.EXTERNALID.equals(typeInformationArray[i])) {
                object2 = (String)tIntObjectHashMap.get(n3);
                if (object2 != null) {
                    objectArray[i] = new ExternalID((String)object2);
                    continue;
                }
                throw new AbortException("External ID column not set in sparse instance." + streamTokenizer.toString());
            }
            if (TypeUtil.CLASSLABEL.equals(typeInformationArray[i])) {
                object2 = tIntObjectHashMap.get(n3);
                if (object2 != null) {
                    object = new SimpleClassLabel(String.valueOf(object2));
                    objectArray[i] = object;
                    continue;
                }
                throw new AbortException("Class label column not set in sparse instance." + streamTokenizer.toString());
            }
            throw new AbortException("Unsupported type for column ->" + i + ": " + (typeInformationArray[i] != null ? typeInformationArray[i].toString() : "null"));
        }
        return objectArray;
    }

    private Object[] loadDenseInstance(StreamTokenizer streamTokenizer, int[] nArray, TypeInformation[] typeInformationArray, int n) throws IOException {
        Object[] objectArray = new Object[n];
        for (int i = 0; i < n; ++i) {
            Object object;
            if (TypeUtil.NUMBER_VECTOR_FIELD.equals(typeInformationArray[i])) {
                object = new double[nArray[i]];
                for (int j = 0; j < nArray[i]; ++j) {
                    if (streamTokenizer.ttype == 63) {
                        streamTokenizer.nval = Double.NaN;
                    } else if (streamTokenizer.ttype == -3) {
                        try {
                            object[j] = FormatUtil.parseDouble(streamTokenizer.sval);
                        }
                        catch (NumberFormatException numberFormatException) {
                            throw new AbortException("Expected number value, got: " + streamTokenizer.sval);
                        }
                    } else {
                        throw new AbortException("Expected word token, got: " + streamTokenizer.toString());
                    }
                    this.nextToken(streamTokenizer);
                }
                objectArray[i] = new DoubleVector((double[])object);
                continue;
            }
            if (TypeUtil.LABELLIST.equals(typeInformationArray[i])) {
                this.labels.clear();
                for (int j = 0; j < nArray[i]; ++j) {
                    if (streamTokenizer.ttype != -3) {
                        throw new AbortException("Expected word token, got: " + streamTokenizer.toString());
                    }
                    this.labels.add(streamTokenizer.sval);
                    this.nextToken(streamTokenizer);
                }
                objectArray[i] = LabelList.make(this.labels);
                continue;
            }
            if (TypeUtil.EXTERNALID.equals(typeInformationArray[i])) {
                if (streamTokenizer.ttype != -3) {
                    throw new AbortException("Expected word token, got: " + streamTokenizer.toString());
                }
                objectArray[i] = new ExternalID(streamTokenizer.sval);
                this.nextToken(streamTokenizer);
                continue;
            }
            if (TypeUtil.CLASSLABEL.equals(typeInformationArray[i])) {
                if (streamTokenizer.ttype != -3) {
                    throw new AbortException("Expected word token, got: " + streamTokenizer.toString());
                }
                object = new SimpleClassLabel(streamTokenizer.sval);
                objectArray[i] = object;
                this.nextToken(streamTokenizer);
                continue;
            }
            throw new AbortException("Unsupported type for column ->" + i + ": " + (typeInformationArray[i] != null ? typeInformationArray[i].toString() : "null"));
        }
        return objectArray;
    }

    private StreamTokenizer makeArffTokenizer(BufferedReader bufferedReader) {
        StreamTokenizer streamTokenizer = new StreamTokenizer(bufferedReader);
        streamTokenizer.resetSyntax();
        streamTokenizer.whitespaceChars(0, 32);
        streamTokenizer.ordinaryChars(48, 57);
        streamTokenizer.ordinaryChar(45);
        streamTokenizer.ordinaryChar(46);
        streamTokenizer.wordChars(33, 255);
        streamTokenizer.whitespaceChars(44, 44);
        streamTokenizer.commentChar(37);
        streamTokenizer.quoteChar(34);
        streamTokenizer.quoteChar(39);
        streamTokenizer.ordinaryChar(123);
        streamTokenizer.ordinaryChar(125);
        streamTokenizer.eolIsSignificant(true);
        return streamTokenizer;
    }

    private void setupBundleHeaders(ArrayList<String> arrayList, int[] nArray, TypeInformation[] typeInformationArray, int[] nArray2, MultipleObjectsBundle multipleObjectsBundle, boolean bl) {
        int n = 0;
        int n2 = 0;
        while (n < nArray.length) {
            String[] stringArray;
            int n3;
            for (n3 = n + 1; n3 < nArray.length && nArray[n3] == nArray[n]; ++n3) {
            }
            if (TypeUtil.NUMBER_VECTOR_FIELD.equals(typeInformationArray[n2])) {
                VectorFieldTypeInformation<AbstractNumberVector> vectorFieldTypeInformation;
                stringArray = new String[nArray2[n2]];
                for (int i = 0; i < nArray2[n2]; ++i) {
                    stringArray[i] = arrayList.get(n2 + i);
                }
                if (!bl) {
                    vectorFieldTypeInformation = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, nArray2[n2], stringArray);
                    multipleObjectsBundle.appendColumn(vectorFieldTypeInformation, new ArrayList());
                } else {
                    vectorFieldTypeInformation = new VectorFieldTypeInformation<SparseDoubleVector>(SparseDoubleVector.FACTORY, nArray2[n2], stringArray);
                    multipleObjectsBundle.appendColumn(vectorFieldTypeInformation, new ArrayList());
                }
            } else if (TypeUtil.LABELLIST.equals(typeInformationArray[n2])) {
                stringArray = new StringBuilder(arrayList.get(n2));
                for (int i = 1; i < nArray2[n2]; ++i) {
                    stringArray.append(' ').append(arrayList.get(n2 + i));
                }
                multipleObjectsBundle.appendColumn(new SimpleTypeInformation<LabelList>(LabelList.class, stringArray.toString()), new ArrayList());
            } else if (TypeUtil.EXTERNALID.equals(typeInformationArray[n2])) {
                multipleObjectsBundle.appendColumn(new SimpleTypeInformation<ExternalID>(ExternalID.class, arrayList.get(n2)), new ArrayList());
            } else if (TypeUtil.CLASSLABEL.equals(typeInformationArray[n2])) {
                multipleObjectsBundle.appendColumn(new SimpleTypeInformation<ClassLabel>(ClassLabel.class, arrayList.get(n2)), new ArrayList());
            } else {
                throw new AbortException("Unsupported type for column " + n + "->" + n2 + ": " + (typeInformationArray[n2] != null ? typeInformationArray[n2].toString() : "null"));
            }
            assert (n2 == multipleObjectsBundle.metaLength() - 1);
            n = n3;
            ++n2;
        }
    }

    private void readHeader(BufferedReader bufferedReader) throws IOException {
        String string;
        do {
            if ((string = bufferedReader.readLine()) != null) continue;
            throw new AbortException(ARFF_HEADER_RELATION + " not found in file.");
        } while (ARFF_COMMENT.reset(string).matches() || EMPTY.reset(string).matches());
        if (!ARFF_HEADER_RELATION.reset(string).matches()) {
            throw new AbortException("Expected relation declaration: " + string);
        }
    }

    private void parseAttributeStatements(BufferedReader bufferedReader, ArrayList<String> arrayList, ArrayList<String> arrayList2) throws IOException {
        block6: {
            String string;
            while (true) {
                if ((string = bufferedReader.readLine()) == null) {
                    throw new AbortException(ARFF_HEADER_DATA + " not found in file.");
                }
                if (ARFF_COMMENT.reset(string).matches() || EMPTY.reset(string).matches()) continue;
                if (ARFF_HEADER_DATA.reset(string).matches()) break block6;
                Matcher matcher = ARFF_HEADER_ATTRIBUTE.reset(string);
                if (!matcher.matches()) break;
                String string2 = matcher.group(1);
                if (string2.charAt(0) == '\'' && string2.charAt(string2.length() - 1) == '\'') {
                    string2 = string2.substring(1, string2.length() - 1);
                } else if (string2.charAt(0) == '\"' && string2.charAt(string2.length() - 1) == '\"') {
                    string2 = string2.substring(1, string2.length() - 1);
                }
                String string3 = matcher.group(2);
                arrayList.add(string2);
                arrayList2.add(string3);
            }
            throw new AbortException("Unrecognized line: " + string);
        }
        assert (arrayList.size() == arrayList2.size());
    }

    private void processColumnTypes(ArrayList<String> arrayList, ArrayList<String> arrayList2, int[] nArray, TypeInformation[] typeInformationArray, int[] nArray2) {
        int n = 0;
        for (int i = 0; i < nArray.length; ++i) {
            if (this.magic_eid != null && this.magic_eid.reset(arrayList.get(i)).matches()) {
                nArray[i] = n;
                typeInformationArray[n] = TypeUtil.EXTERNALID;
                nArray2[n] = 1;
                ++n;
                continue;
            }
            if (this.magic_class != null && this.magic_class.reset(arrayList.get(i)).matches()) {
                nArray[i] = n;
                typeInformationArray[n] = TypeUtil.CLASSLABEL;
                nArray2[n] = 1;
                ++n;
                continue;
            }
            if (ARFF_NUMERIC.reset(arrayList2.get(i)).matches()) {
                if (n > 0 && TypeUtil.NUMBER_VECTOR_FIELD.equals(typeInformationArray[n - 1])) {
                    nArray[i] = n - 1;
                    int n2 = n - 1;
                    nArray2[n2] = nArray2[n2] + 1;
                    continue;
                }
                nArray[i] = n;
                typeInformationArray[n] = TypeUtil.NUMBER_VECTOR_FIELD;
                nArray2[n] = 1;
                ++n;
                continue;
            }
            if (n > 0 && TypeUtil.LABELLIST.equals(typeInformationArray[n - 1])) {
                nArray[i] = n - 1;
                int n3 = n - 1;
                nArray2[n3] = nArray2[n3] + 1;
                continue;
            }
            nArray[i] = n;
            typeInformationArray[n] = TypeUtil.LABELLIST;
            nArray2[n] = 1;
            ++n;
        }
    }

    private void nextToken(StreamTokenizer streamTokenizer) throws IOException {
        streamTokenizer.nextToken();
        if (streamTokenizer.ttype == 39 || streamTokenizer.ttype == 34) {
            streamTokenizer.ttype = -3;
        } else if (streamTokenizer.ttype == -3 && streamTokenizer.sval.equals("?")) {
            streamTokenizer.ttype = 63;
        }
        if (LOG.isDebugging()) {
            if (streamTokenizer.ttype == -2) {
                LOG.debug("token: " + streamTokenizer.nval);
            } else if (streamTokenizer.ttype == -3) {
                LOG.debug("token: " + streamTokenizer.sval);
            } else if (streamTokenizer.ttype == -1) {
                LOG.debug("token: EOF");
            } else if (streamTokenizer.ttype == 10) {
                LOG.debug("token: EOL");
            } else {
                LOG.debug("token type: " + streamTokenizer.ttype);
            }
        }
    }

    @Override
    public void cleanup() {
        if (this.magic_eid != null) {
            this.magic_eid.reset("");
        }
        if (this.magic_class != null) {
            this.magic_class.reset("");
        }
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        public static final OptionID MAGIC_EID_ID = new OptionID("arff.externalid", "Pattern to recognize external ID attributes.");
        public static final OptionID MAGIC_CLASS_ID = new OptionID("arff.classlabel", "Pattern to recognize class label attributes.");
        Pattern magic_eid;
        Pattern magic_class;

        @Override
        protected void makeOptions(Parameterization parameterization) {
            PatternParameter patternParameter;
            super.makeOptions(parameterization);
            PatternParameter patternParameter2 = new PatternParameter(MAGIC_EID_ID, ArffParser.DEFAULT_ARFF_MAGIC_EID);
            if (parameterization.grab(patternParameter2)) {
                this.magic_eid = (Pattern)patternParameter2.getValue();
            }
            if (parameterization.grab(patternParameter = new PatternParameter(MAGIC_CLASS_ID, ArffParser.DEFAULT_ARFF_MAGIC_CLASS))) {
                this.magic_class = (Pattern)patternParameter.getValue();
            }
        }

        @Override
        protected ArffParser makeInstance() {
            return new ArffParser(this.magic_eid, this.magic_class);
        }
    }
}

