"""
The main script that runs the model-based scatterplot optimization system 

Authors: Luana Micallef, Antti Oulasvirta, Tino Weinkauf, Gregorio Palmas, August 2015
"""


import os 
from scatterplotimage import ScatterPlotter
#import clustering
import model
import utilities
import colors
import optimizers
import plottingintermdata
import sys
import numpy as np
from scipy import stats
import itertools



# Data csv files, optimized plot image files, and temp directories
#logsFilesDir = ".." + os.path.sep + "optimizedplots" + os.path.sep + "ClusteringGeneratedPlots" + os.path.sep + "Training/Hard2" #"Blobs"#"MediumDots" #"SmoothVar2"#"/Training/Hard1"#"/Training/Hard2"
logsFilesDir = ".." + os.path.sep + "optimizedplots" + os.path.sep + "eval" #"Blobs"#"MediumDots" #"SmoothVar2"#"/Training/Hard1"#"/Training/Hard2"

#dataFilesDir = ".." + os.path.sep + "data" + os.path.sep + "studydata/clusters" #"studydata/outliers" #"studydata/corr" #"studydata/clusters" #"wClusterIDwOutliers" 
dataFilesDir = ".." + os.path.sep + "data" + os.path.sep + "study2Data/clusters" #"studydata/outliers" #"studydata/corr" #"studydata/clusters" #"wClusterIDwOutliers" 


#dataFileNames = utilities.getFileNamesFromDir(dataFilesDir,"csv")
logsFileNames = utilities.getFileNamesFromDir(logsFilesDir,"log")



# Find an optimized scatterplot for each set of data points in the dataFileNames files
for logsFileName in logsFileNames:

    #if "clusters_0" in dataFileName or "clusters_8" in dataFileName:
    #    continue
    #if "randNorm_1000pnts_4clusters" not in logsFileName:
    #    continue

    # ***************************************
    #  DATA part
    # *************************************** 

    if "pbc_converted5k" not in logsFileName:
        continue

    #pattern = "clusters_"
    #clusterIndexName = logsFileName.index("clusters_")
    #dataFileName = logsFileName[0:clusterIndexName+len(pattern)+1] +".csv"

    pattern = "k_"
    clusterIndexName = logsFileName.index("k_")
    dataFileName = logsFileName[0:clusterIndexName+len(pattern)-1] +".csv"

    # Load data points from csv file and cluster the data points
    dataFilePath = dataFilesDir + os.path.sep + dataFileName
    
    datapoints, datapointsClusters, datapointsClusters_woOutliers, \
    outliers_foreachcluster, corr_perCluster = \
            utilities.getDataPointsWithClustersFromCSVFile(dataFilePath, hasHeading=True, shuffle=False)
    
    DataBoundingBox = utilities.getBBox(datapoints)
    #datapointsClusters = {0:datapoints}
    #datapointsClusters = clustering.clusterDataPoints(datapoints, maxK=maxClusters)  # use this to detect the number of clusters in the data points
    TriangleIndices = np.stack(np.triu_indices(len(datapointsClusters), 1), axis=-1)
     
     
    # Compute covariance ellipse and its properties for each actual data points cluster
    clustersMeasures = {}
    actualCovEllipses = {}
    #actualCorrelations = {}
    actualSDySDxRatios = {}
    for d in range(0,len(datapointsClusters)):
        datapointsCluster = datapointsClusters[d]
        actual_covellipse, actual_minorOnMajorAxis, actual_angle = model.getDataCovarianceEllipse(datapointsCluster)
        clustersMeasures[d] = (actual_minorOnMajorAxis, actual_angle) 
        actualCovEllipses[d] = actual_covellipse
        #actualCorrelations[d] = stats.pearsonr(datapointsCluster[:,0], datapointsCluster[:,1])[0]
        actualSDySDxRatios[d] = model.getSDySDxRatioOfPointCloud(datapointsCluster)
        
    # Compute the cluster overlap measure for each pair of clusters
    actualPairwiseClusterOverlapMeasures, actualCovEllipsesOverlapAreas = model.getPairwiseClusterOverlapMeasures(actualCovEllipses, datapointsClusters, TriangleIndices)
   
    # Compute rendering order of clusters
    datapoints, \
    datapointsClusters, \
    outliers_foreachcluster, \
    actualPairwiseClusterOverlapMeasures_relabelled, \
    clustersMeasures_relabelled, \
    actualCovEllipses_relabelled, \
    actualSDySDxRatios_relabelled = model.getRenderingOrderOfClusters(datapoints, \
                                                                      datapointsClusters, \
                                                                      outliers_foreachcluster, \
                                                                      actualPairwiseClusterOverlapMeasures, \
                                                                      clustersMeasures, \
                                                                      actualCovEllipses, \
                                                                      actualSDySDxRatios, \
                                                                      TriangleIndices)
    
    # Get distinguishable RGB colors for clusters
    # Each color is a tuple of the form (r,g,b) where each of r, g and b values is in [0,1]
    colorDeltas = colors.getDeltaEBetweenCategoricalColorBrewerColors()
    color_foreachcluster = colors.getRGBColorsForClusters(actualPairwiseClusterOverlapMeasures_relabelled, colorDeltas, TriangleIndices)          
    
    
    print "\n******"+dataFileName+"******\n"
    
    winnerLine = utilities.GetWinnerDesignRowFromLogs(logsFilesDir+ os.path.sep +logsFileName)
    
    design = winnerLine["marker_size"], winnerLine["marker_opacity"], winnerLine["image_width"], winnerLine["image_aspect_ratio"]
    ellipsesDir = logsFilesDir+ os.path.sep+"Ellipses/"
    utilities.ensureDirectory(ellipsesDir)

    #utilities.CreateEllipseImages(DataBoundingBox, design, datapointsClusters, color_foreachcluster, ellipsesDir + "/"+logsFileName.replace(".log",".png"))
    
    #for eval study
    utilities.CreateEllipseImages(DataBoundingBox, design, datapointsClusters, color_foreachcluster, ellipsesDir + "/"+logsFileName.replace(".log",".png"), ellipses_scale = 2.1)

        
        
        
        
        