"""
The model that determines how 'good' a scatterplot image perceptually communicates 
the statistical information of the data points for different types of data analysis tasks

Author: Luana Micallef and Tino Weinkauf, August 2015
"""

import utilities
import numpy as np
import os, math, sys
import ellipse
from ellipse import Ellipse
from skimage import color
from skimage.feature import canny



# Returns the properties of an `nstd` sigma error ellipse based on the specified covariance matrix (`cov`)
# Obtained from: https://github.com/tohojo/flent/blob/master/flent/error_ellipse.py
def getCovEllipsePropsForPoints(points, nstd=2):    

    def eigsorted(cov):
        vals, vecs = np.linalg.eigh(cov)
        order = vals.argsort()[::-1]
        return vals[order], vecs[:,order]

    pos = points.mean(axis=0)
    cov = np.cov(points, rowvar=False)
    
    vals, vecs = eigsorted(cov)

    #get the largest eigenvector to point into the upper hemisphere
    largest_eigenvec = vecs[:,0]
    if largest_eigenvec[1] < 0:
        largest_eigenvec *= -1

    alpha = np.degrees(math.acos(np.dot(np.array([1,0]), largest_eigenvec)))

    # Width and height are "full" widths, not radius
    width, height = 2 * nstd * np.sqrt(vals)

    return (pos, width, height, alpha)


# Computes the covariance ellipse for a set of points
def getCovEllipseForPoints(points, ellipse_nstd):

    ellipse_props = getCovEllipsePropsForPoints(points, nstd=ellipse_nstd)
    
    xc = ellipse_props[0][0]
    yc = ellipse_props[0][1]    
    major_axis = ellipse_props[1]/2
    minor_axis = ellipse_props[2]/2
    major_axis_angle_anticlockwise_deg = ellipse_props[3]
    
    return Ellipse(xc, yc, major_axis, minor_axis, major_axis_angle_anticlockwise_deg)


# Converts boolean pixel map (e.g., the 2D boolean map extracted by canny edge detector, 
# where each point is either True or False depending on whether or not an edge was detected in that pixel)
# to an array of only the pixels that are are on
def convertBooleanPixelMapToDataPoints(pixelmap):

    nonzeros = np.nonzero(pixelmap)
    return np.stack((nonzeros[1], len(pixelmap) - nonzeros[0]), axis=-1)

    #datapoints = []
    #for i in range (0,len(pixelmap)):
    #    for j in range (0, len(pixelmap[i])):
    #        pixelOn = (pixelmap [i,j] == True)
    #        if (pixelOn):
    #            datapoints.append([j,len(pixelmap)-i])
    
    #haha = np.asarray(datapoints)
    #return haha



# Gets the perceived edges of points detected in the scatterplot image
# Input: scatterplot_image_rgba - numpy array with the RGBA values of the pixels of the scatterplot image
def getPerceivedPoints(image_rgba):
    
    # change to grayscale
    image_gray = color.rgb2gray(image_rgba)
    
    # use canny edge detector to detect edges in the grayscale image 
    # ... more about parameter setting here: http://wintopo.com/help/html/canny-opt.htm
    cannysigma = 4
    edges = canny(image_gray, sigma=cannysigma)
    
    # convert boolean edges to points
    data_edges = convertBooleanPixelMapToDataPoints(edges)
    if (len(data_edges)==0):
        return (False, None)
        
    return (True, data_edges)



# Returns the covariance ellipse extracted from the perceived edges of points detected in the scatterplot image
# Inputs: 
#  - image_rgba: numpy array containing the RGBA values of all the pixels of the scatterplot image
def getPerceivedCovarianceEllipse(image_rgba):
    
    pca_ellipse_nstd = 1.5
    
    wereEdgesPerceived, perceived_points = getPerceivedPoints(image_rgba)
    
    if not wereEdgesPerceived:
        return wereEdgesPerceived, perceived_points, None, None, None 
        
    covellipse = getCovEllipseForPoints(perceived_points, pca_ellipse_nstd)
    minorOnMajorAxis, angle = getMeasuresFromCovEllipseProperties(covellipse)
    
    return wereEdgesPerceived, perceived_points, covellipse, minorOnMajorAxis, angle


# Gets actual covariance ellipse based on the data points in the csv file, which the scatterplot image should visualize
def getDataCovarianceEllipse(datapoints):

    X, Y = datapoints.T
    #StdX = np.std(X)
    #StdY = np.std(Y)
    #scalingFactor = StdX/StdY

    rangeX = max(X) - min(X)
    rangeY = max(Y) - min(Y)

    scalingFactor = rangeX/rangeY

    scaledDataPoints = np.array(datapoints)
    scaledDataPoints[:,1] = np.multiply(scaledDataPoints[:,1], scalingFactor)

    #np.savetxt('scaledpoints.csv',scaledDataPoints,delimiter = ',', header = 'X,Y')
    #np.savetxt('points.csv',datapoints,delimiter = ',', header = 'X,Y')

    #scaled_X, scaled_Y = scaledDataPoints.T
    #scaled_StdX = np.std(scaled_X)
    #scaled_StdY = np.std(scaled_Y)

    pca_ellipse_nstd = 1.5
    covellipse = getCovEllipseForPoints(scaledDataPoints, pca_ellipse_nstd)
    minorOnMajorAxis, angle = getMeasuresFromCovEllipseProperties(covellipse)

    return (covellipse, minorOnMajorAxis, angle)


# Computes the correlation goodness measures from the covariance ellipse 
# Returns
# - aspect ratio (minor axis / major axis) of the ellipse
# - major axis orientation as an anticlockwise angle from the x-axis in degrees of the ellipse as a fraction of the max angle of 180degrees in [0,1)    
#   where a value in [0,0.5) indicated +ve correlation and (0.5,1] -ve correlation
def getMeasuresFromCovEllipseProperties(covellipse):
    minorOnMajorAxis = float(covellipse.axis_minor) / float(covellipse.axis_major)
    #angleRatio = covellipse.angle / 180
    return minorOnMajorAxis, covellipse.angle


# Computes the smallest angle between the major axis of the actual covariance ellipse and the major axis of the perceived covariance ellipse
# Inputs: the actual and perceived cov ellipses as Ellipse objects
# Returns: the angle between the major axes of the 2 ellipses
def getSmallestAngleBetweenMajorAxesOfActualAndPerceivedCovEllipses(actual_covellipse, perceived_covellipse):

    actual_gradient = utilities.getGradientOfLineGivenAngleToXAxis(actual_covellipse.angle)
    perceived_gradient = utilities.getGradientOfLineGivenAngleToXAxis(perceived_covellipse.angle)
    
    angleInRadians = utilities.computeSmallestAngleInRadiansBetweenTwoLines(actual_gradient, perceived_gradient)
    angle_norm = angleInRadians / (math.pi/2) 
    return angle_norm
   
   
# Computes SDy/SDx (i.e., gradient of the standard deviation) of a point cloud
# Input: numpy array of points where each point is of form [x,y]
# Output: SDy/SDx
def getSDySDxRatioOfPointCloud(datapoints):
    xs,ys = datapoints.T
    return np.std(ys)/np.std(xs) 
    
    
# Computes the point cloud direction error 
# If the direction of the actual and perceived point cloud is EQUAL (i.e., the correlation has same sign), 
# 0 is returned 
# If the direction of the actual and perceived point cloud is DIFFERENCE (i.e., the correlation has a different sign), 
# a measure of how far the perceived ellipse angle is from changing its sign and thus get the required sign 
# in (0,1] is returned
def getPointCloudDirectionError(actual_covellipse, perceived_covellipse):
    if (np.sign (90 - actual_covellipse.angle) == np.sign (90 - perceived_covellipse.angle)):
        dir_error = 0 
    else: 
        angle_perceived_normalized = abs(perceived_covellipse.angle - 90) / 90
        dir_error = min ( angle_perceived_normalized, 1 - angle_perceived_normalized )
    return dir_error
        
        


# Compute the cluster overlap measure between 2 clusters
# returns a value in [0,1]; if value is in [0,0.5] then 
# the covariance ellipses of the clusters do not overlap and thus the measure is based on how apart the ellipses are;
# if value is in (0.5,1] then the covariance ellipses of the clusters overlap
def getClusterOverlapMeasure(covEllipseOverlapArea, covellCluster1, covellCluster2, pointsNotInEllipse1, pointsNotInEllipse2):
    # if there is an overlap between the covariance ellipses, base the measure on this overlap
    if (covEllipseOverlapArea>0):
        smallestCovEllArea = min(ellipse.getEllipseArea(covellCluster1),ellipse.getEllipseArea(covellCluster2)) 
        return (0.5 + ( (covEllipseOverlapArea / float(smallestCovEllArea)) /2 ) ) 
    else:
        # if there is no overlap between the covariance ellipses, check the distance between the nearest and farthest points in the 2 clusters
        # ... the complexity of this computation is reduced by considering only points that are outside the covariance ellipse with an adjusted size
        dists = utilities.distanceBetweenEachPairOfPointsInTwoArrays(pointsNotInEllipse1, pointsNotInEllipse2)
        dists_flatten = dists.flatten()
        mindist = np.amin(dists_flatten)
        maxdist = np.amax(dists_flatten)
        return (0.5 - ( (mindist / float(maxdist)) /2 ) ) 


# Find the points that lay on the outer side of the covariance ellipse
def findPointsOnOuterOfCovEllipse (covellipse, perceived_datapoints):
    ellsizeadjust_str = 1.2
    ellsizeadjust_end = 0.8
    ellsizeadjust_change = 0.1
    
    ellsizeadjust = ellsizeadjust_str
    outer_datapoints = []
    while ((len(outer_datapoints) == 0) and (ellsizeadjust >= ellsizeadjust_end)):
        outer_datapoints = ellipse.findPointsNotInEllipse(covellipse, perceived_datapoints, ellsizeadjust)
        ellsizeadjust = ellsizeadjust - ellsizeadjust_change

    return outer_datapoints


# Computes the cluster overlap measure for pairwise clusters
def getPairwiseClusterOverlapMeasures(covellipse_foreachcluster, datapoints_foreachcluster, TriangleIndices = None):
        
    covEllipsesOverlapAreas = ellipse.getPairwiseEllipseOverlapAreas(covellipse_foreachcluster)
    
    clusterOverlapMeasures = np.zeros(covEllipsesOverlapAreas.shape)
    if (TriangleIndices==None):
        TriangleIndices = np.stack(np.triu_indices(clusterOverlapMeasures.shape[0], 1), axis=-1)
        
    pntsNotInCovEll_foreachcluster = np.empty((len(datapoints_foreachcluster),), dtype=object) 
    pntsNotInCovEll_foreachcluster[:] = [[] * len(pntsNotInCovEll_foreachcluster)] 
    
    for pair in TriangleIndices:
        overlap = covEllipsesOverlapAreas[pair[0],pair[1]]
        covellCluster1 = covellipse_foreachcluster[pair[0]]
        covellCluster2 = covellipse_foreachcluster[pair[1]]
        
        # find the points that are not in ellipse here to avoid repeating this more than once
        if (overlap <= 0.0):
            if (pntsNotInCovEll_foreachcluster[pair[0]] == []):
                pntsNotInCovEll_foreachcluster[pair[0]] = \
                    findPointsOnOuterOfCovEllipse (covellCluster1, datapoints_foreachcluster[pair[0]])
            if (pntsNotInCovEll_foreachcluster[pair[1]] == []):
                pntsNotInCovEll_foreachcluster[pair[1]] = \
                    findPointsOnOuterOfCovEllipse (covellCluster2, datapoints_foreachcluster[pair[1]])

        clusterOverlapMeasures[pair[0], pair[1]] = \
                getClusterOverlapMeasure(overlap,
                                     covellCluster1,
                                     covellCluster2,
                                     pntsNotInCovEll_foreachcluster[pair[0]],
                                     pntsNotInCovEll_foreachcluster[pair[1]])
                                              
    return  clusterOverlapMeasures, covEllipsesOverlapAreas


# Returns the overall cluster overlap measure for the plot in general given the pairwise cluster overlap measures
# i.e., the average cluster overlap measures of all pairwise clusters
# the returned value is in [0,1]
def getOverallClusterOverlapMeasure(pairwiseClusterOverlapMeasures):

    return np.sum(pairwiseClusterOverlapMeasures.flatten()) / \
           float(utilities.computeNumberOfCombinations(pairwiseClusterOverlapMeasures.shape[0],2))



# Returns the data points as one list and as a list of cluster points in the order they should be rendered
# such that those clusters with a high average overlap measure are placed in the latter part of the list
# inputs: datapointsClusters to sort and their relative outliers, pairwise overlaps, correlation measures per cluster, measures per cluster and the list of pairs
# outputs: datapoints, datapointsClusters, and outliers per cluster sorted, and the pairwiseClusterOverlapMeasures metric resorted to match the new order
def getRenderingOrderOfClusters(datapoints, datapointsClusters, outliers_perCluster, pairwiseClusterOverlapMeasures, clustersMeasures, actualCovEllipses, actualSDySDxRatios, TriangleIndices = None):
       
    # if TriangleIndices is not computed yet
    if (TriangleIndices==None):
        TriangleIndices = np.stack(np.triu_indices(pairwiseClusterOverlapMeasures.shape[0], 1), axis=-1)

    # compute no. of clusters
    noOfClusters = len(datapointsClusters)
    
    # if 1 cluster, not need to compute ordering of clusters
    if (noOfClusters == 1):  
        return datapoints, datapointsClusters, outliers_perCluster, pairwiseClusterOverlapMeasures, clustersMeasures, actualCovEllipses, actualSDySDxRatios
    
    # initiate variables
    clusterAvgOverlapMeasures = [0 for i in range(0,noOfClusters)]

    # sum all the overlap measures related to an ellipse
    for pair in TriangleIndices:
        clusterAvgOverlapMeasures[pair[0]] = clusterAvgOverlapMeasures[pair[0]] + pairwiseClusterOverlapMeasures[pair[0],pair[1]]
        clusterAvgOverlapMeasures[pair[1]] = clusterAvgOverlapMeasures[pair[1]] + pairwiseClusterOverlapMeasures[pair[0],pair[1]]
    
    # find the average overlap measure for each ellipse    
    clusterAvgOverlapMeasures = [ (c/(noOfClusters-1)) for c in clusterAvgOverlapMeasures ]
 
    # get the indexes of the clusters sorted in ascending order according to their average overlap measure 
    clusterAvgOverlapMeasures_sortedindexes = sorted(range(len(clusterAvgOverlapMeasures)),key=clusterAvgOverlapMeasures.__getitem__)

    # order the actual clusters and their points
    sortedDatapoints = None
    sortedClusters = {}
    sortedOutliers_perCluster = {}
    sortedCorrelations  = {}
    clusterIDSwap_oldtonew = dict()
    clustersMeasures_relabelled = dict()
    actualCovEllipses_relabelled = dict()
    actualSDySDxRatios_relabelled = dict()
    j = 0 
    for i in clusterAvgOverlapMeasures_sortedindexes:
        sortedDatapoints = datapointsClusters[i] if (sortedDatapoints == None) \
                                                 else np.concatenate((sortedDatapoints,datapointsClusters[i]), axis = 0)
        sortedClusters[j] = datapointsClusters[i]
        sortedOutliers_perCluster[j] = outliers_perCluster[i]
        clusterIDSwap_oldtonew[i] = j
        clustersMeasures_relabelled[j] = clustersMeasures[i]
        actualCovEllipses_relabelled[j] = actualCovEllipses[i]
        actualSDySDxRatios_relabelled[j] = actualSDySDxRatios[i]
        j=j+1
    
    # relabel pairwiseClusterOverlapMeasures to match the new order of the clusters 
    pairwiseClusterOverlapMeasures_relabelled = np.zeros(pairwiseClusterOverlapMeasures.shape)
    for pair in TriangleIndices:
        newindxs_sorted = sorted([clusterIDSwap_oldtonew[pair[0]],clusterIDSwap_oldtonew[pair[1]]])
        pairwiseClusterOverlapMeasures_relabelled[newindxs_sorted[0],newindxs_sorted[1]] = \
                                          pairwiseClusterOverlapMeasures[pair[0],pair[1]]
    
    return sortedDatapoints, sortedClusters, sortedOutliers_perCluster, pairwiseClusterOverlapMeasures_relabelled, clustersMeasures_relabelled, actualCovEllipses_relabelled, actualSDySDxRatios_relabelled
    












