/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleDynamicHistogram;
import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleStaticHistogram;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

@Reference(authors="Robert Haralick, Rave Harpaz", title="Linear manifold clustering in high dimensional spaces by stochastic search", booktitle="Pattern Recognition volume 40, Issue 10", url="http://dx.doi.org/10.1016/j.patcog.2007.01.020")
public class LMCLUS
extends AbstractAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(LMCLUS.class);
    private static final double NOT_FROM_ONE_CLUSTER_PROBABILITY = 0.2;
    private static final int BINS = 50;
    private final double sensitivityThreshold;
    private final int maxLMDim;
    private final int minsize;
    private final int samplingLevel;
    private final RandomFactory rnd;

    public LMCLUS(int n, int n2, int n3, double d, RandomFactory randomFactory) {
        this.maxLMDim = n;
        this.minsize = n2;
        this.samplingLevel = n3;
        this.sensitivityThreshold = d;
        this.rnd = randomFactory;
    }

    public Clustering<Model> run(Database database, Relation<NumberVector> relation) {
        Clustering<Model> clustering = new Clustering<Model>("LMCLUS Clustering", "lmclus-clustering");
        FiniteProgress finiteProgress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
        IndefiniteProgress indefiniteProgress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
        HashSetModifiableDBIDs hashSetModifiableDBIDs = DBIDUtil.newHashSet(relation.getDBIDs());
        Random random = this.rnd.getSingleThreadedRandom();
        int n = Math.min(this.maxLMDim, RelationUtil.dimensionality(relation));
        int n2 = 0;
        while (hashSetModifiableDBIDs.size() > this.minsize) {
            ModifiableDBIDs modifiableDBIDs = hashSetModifiableDBIDs;
            int n3 = 1;
            block1: for (int i = 1; i <= n; ++i) {
                while (true) {
                    Separation separation = this.findSeparation(relation, modifiableDBIDs, i, random);
                    if (separation.goodness <= this.sensitivityThreshold) continue block1;
                    ArrayModifiableDBIDs arrayModifiableDBIDs = DBIDUtil.newArray(modifiableDBIDs.size());
                    DBIDIter dBIDIter = modifiableDBIDs.iter();
                    while (dBIDIter.valid()) {
                        if (this.deviation(relation.get(dBIDIter).getColumnVector().minusEquals(separation.originV), separation.basis) < separation.threshold) {
                            arrayModifiableDBIDs.add(dBIDIter);
                        }
                        dBIDIter.advance();
                    }
                    if (arrayModifiableDBIDs.size() < this.minsize) continue block1;
                    modifiableDBIDs = arrayModifiableDBIDs;
                    n3 = i;
                }
            }
            if (modifiableDBIDs.size() < this.minsize || modifiableDBIDs == hashSetModifiableDBIDs) break;
            Cluster cluster = new Cluster(modifiableDBIDs);
            cluster.setName("Cluster_" + n3 + "d_" + n2);
            ++n2;
            clustering.addToplevelCluster(cluster);
            hashSetModifiableDBIDs.removeDBIDs(modifiableDBIDs);
            if (finiteProgress != null) {
                finiteProgress.setProcessed(relation.size() - hashSetModifiableDBIDs.size(), LOG);
            }
            if (indefiniteProgress == null) continue;
            indefiniteProgress.setProcessed(n2, LOG);
        }
        if (hashSetModifiableDBIDs.size() > 0) {
            clustering.addToplevelCluster(new Cluster((DBIDs)hashSetModifiableDBIDs, true));
        }
        if (finiteProgress != null) {
            finiteProgress.setProcessed(relation.size(), LOG);
            finiteProgress.ensureCompleted(LOG);
        }
        LOG.setCompleted(indefiniteProgress);
        return clustering;
    }

    private double deviation(Vector vector, Matrix matrix) {
        double d;
        double d2 = vector.squaredEuclideanLength();
        return d2 > (d = matrix.transposeTimes(vector).squaredEuclideanLength()) ? Math.sqrt(d2 - d) : 0.0;
    }

    private Separation findSeparation(Relation<NumberVector> relation, DBIDs dBIDs, int n, Random random) {
        Separation separation = new Separation();
        int n2 = (int)Math.min(Math.log(0.2) / Math.log(1.0 - Math.pow(1.0 / (double)this.samplingLevel, n)), (double)dBIDs.size());
        int n3 = 100;
        for (int i = 1; i <= n2; ++i) {
            ModifiableDBIDs modifiableDBIDs = DBIDUtil.randomSample(dBIDs, n + 1, random);
            DBIDIter dBIDIter = modifiableDBIDs.iter();
            Vector vector = relation.get(dBIDIter).getColumnVector();
            dBIDIter.advance();
            Object object = new ArrayList<Vector>(modifiableDBIDs.size() - 1);
            while (dBIDIter.valid()) {
                Vector vector2 = relation.get(dBIDIter).getColumnVector();
                object.add((Vector)vector2.minusEquals(vector));
                dBIDIter.advance();
            }
            Matrix matrix = this.generateOrthonormalBasis((List<Vector>)object);
            if (matrix == null) {
                --i;
                if (--n3 >= 0) continue;
                throw new AbortException("Too many retries in sampling, and always a linear dependant data set.");
            }
            object = new DoubleDynamicHistogram(50);
            double d = 1.0 / (double)dBIDs.size();
            Object object2 = dBIDs.iter();
            while (object2.valid()) {
                if (!modifiableDBIDs.contains((DBIDRef)object2)) {
                    Vector vector3 = relation.get((DBIDRef)object2).getColumnVector().minusEquals(vector);
                    double d2 = this.deviation(vector3, matrix);
                    ((DoubleDynamicHistogram)object).increment(d2, d);
                }
                object2.advance();
            }
            object2 = this.findAndEvaluateThreshold((DoubleDynamicHistogram)object);
            if (!(object2[1] > separation.goodness)) continue;
            separation.goodness = (double)object2[1];
            separation.threshold = (double)object2[0];
            separation.originV = vector;
            separation.basis = matrix;
        }
        return separation;
    }

    private Matrix generateOrthonormalBasis(List<Vector> list) {
        Vector vector = list.get(0);
        vector = vector.times(1.0 / vector.euclideanLength());
        Matrix matrix = new Matrix(vector.getDimensionality(), list.size());
        matrix.setCol(0, vector);
        for (int i = 1; i < list.size(); ++i) {
            Vector vector2 = list.get(i);
            Vector vector3 = vector2.copy();
            for (int j = 0; j < i; ++j) {
                Vector vector4 = matrix.getCol(j);
                double d = vector2.transposeTimes(vector4) / vector4.transposeTimes(vector4);
                if (Double.isNaN(d)) {
                    if (LOG.isDebuggingFine()) {
                        LOG.debugFine("Zero vector encountered? " + vector4);
                    }
                    return null;
                }
                vector3.minusTimesEquals(vector4, d);
            }
            double d = vector3.euclideanLength();
            if (d == 0.0) {
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine("Points not independent - no orthonormalization.");
                }
                return null;
            }
            vector3.timesEquals(1.0 / d);
            matrix.setCol(i, vector3);
        }
        return matrix;
    }

    private double[] findAndEvaluateThreshold(DoubleDynamicHistogram doubleDynamicHistogram) {
        int n;
        int n2 = doubleDynamicHistogram.getNumBins();
        double[] dArray = new double[n2];
        double[] dArray2 = new double[n2];
        double[] dArray3 = new double[n2];
        double[] dArray4 = new double[n2];
        double[] dArray5 = new double[n2];
        double[] dArray6 = new double[n2];
        double[] dArray7 = new double[n2];
        MeanVariance meanVariance = new MeanVariance();
        DoubleStaticHistogram.Iter iter = doubleDynamicHistogram.iter();
        int n3 = 0;
        while (iter.valid()) {
            dArray[n3] = iter.getValue() + (n3 > 0 ? dArray[n3 - 1] : 0.0);
            meanVariance.put(n3, iter.getValue());
            dArray3[n3] = meanVariance.getMean();
            dArray5[n3] = meanVariance.getNaiveStddev();
            ++n3;
            iter.advance();
        }
        meanVariance = new MeanVariance();
        iter = doubleDynamicHistogram.iter();
        iter.seek(doubleDynamicHistogram.getNumBins() - 1);
        n3 = n2 - 1;
        while (iter.valid()) {
            dArray2[n3] = iter.getValue() + (n3 + 1 < n2 ? dArray2[n3 + 1] : 0.0);
            meanVariance.put(n3, iter.getValue());
            dArray4[n3] = meanVariance.getMean();
            dArray6[n3] = meanVariance.getNaiveStddev();
            --n3;
            iter.retract();
        }
        for (n = 0; n < n2; ++n) {
            dArray7[n] = 1.0 + 2.0 * (dArray[n] * (Math.log(dArray5[n]) - Math.log(dArray[n])) + dArray2[n] * (Math.log(dArray6[n]) - Math.log(dArray2[n])));
        }
        n = -1;
        double d = Double.NEGATIVE_INFINITY;
        double d2 = dArray7[1] - dArray7[0];
        for (int i = 1; i < dArray7.length - 1; ++i) {
            double d3 = dArray7[i + 1] - dArray7[i];
            if (d3 >= 0.0 && d2 <= 0.0) {
                double d4;
                int n4;
                double d5 = Double.POSITIVE_INFINITY;
                for (n4 = i - 1; n4 > 0; --n4) {
                    if (!(dArray7[n4 - 1] < dArray7[n4])) continue;
                    d5 = Math.min(d5, dArray7[n4]);
                    break;
                }
                for (n4 = i + 1; n4 < n2 - 2; ++n4) {
                    if (!(dArray7[n4 + 1] < dArray7[n4])) continue;
                    d5 = Math.min(d5, dArray7[n4]);
                    break;
                }
                double d6 = d5 - dArray7[i];
                double d7 = dArray3[i] - dArray4[i];
                double d8 = d7 * d7 / (dArray5[i] * dArray5[i] + dArray6[i] * dArray6[i]);
                if (Double.isNaN(d8)) {
                    d8 = -1.0;
                }
                if ((d4 = d6 * d8) > d) {
                    d = d4;
                    n = i;
                }
            }
            d2 = d3;
        }
        DoubleStaticHistogram.Iter iter2 = doubleDynamicHistogram.iter();
        iter2.seek(n);
        return new double[]{iter2.getRight(), d};
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        public static final OptionID MAXDIM_ID = new OptionID("lmclus.maxdim", "Maximum linear manifold dimension to search.");
        public static final OptionID MINSIZE_ID = new OptionID("lmclus.minsize", "Minimum cluster size to allow.");
        public static final OptionID SAMPLINGL_ID = new OptionID("lmclus.sampling-level", "A number used to determine how many samples are taken in each search.");
        public static final OptionID THRESHOLD_ID = new OptionID("lmclus.threshold", "Threshold to determine if a cluster was found.");
        public static final OptionID RANDOM_ID = new OptionID("lmclus.seed", "Random generator seed.");
        private int maxdim = Integer.MAX_VALUE;
        private int minsize;
        private int samplingLevel;
        private double threshold;
        private RandomFactory rnd;

        @Override
        protected void makeOptions(Parameterization parameterization) {
            RandomParameter randomParameter;
            DoubleParameter doubleParameter;
            IntParameter intParameter;
            super.makeOptions(parameterization);
            IntParameter intParameter2 = new IntParameter(MAXDIM_ID);
            intParameter2.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
            intParameter2.setOptional(true);
            if (parameterization.grab(intParameter2)) {
                this.maxdim = (Integer)intParameter2.getValue();
            }
            IntParameter intParameter3 = new IntParameter(MINSIZE_ID);
            intParameter3.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
            if (parameterization.grab(intParameter3)) {
                this.minsize = (Integer)intParameter3.getValue();
            }
            if (parameterization.grab(intParameter = new IntParameter(SAMPLINGL_ID, 100))) {
                this.samplingLevel = (Integer)intParameter.getValue();
            }
            if (parameterization.grab(doubleParameter = new DoubleParameter(THRESHOLD_ID))) {
                this.threshold = (Double)doubleParameter.getValue();
            }
            if (parameterization.grab(randomParameter = new RandomParameter(RANDOM_ID))) {
                this.rnd = (RandomFactory)randomParameter.getValue();
            }
        }

        @Override
        protected LMCLUS makeInstance() {
            return new LMCLUS(this.maxdim, this.minsize, this.samplingLevel, this.threshold, this.rnd);
        }
    }

    private static class Separation {
        double goodness = Double.NEGATIVE_INFINITY;
        double threshold = Double.NEGATIVE_INFINITY;
        Matrix basis = null;
        Vector originV = null;

        private Separation() {
        }
    }
}

