"""
The main script that runs the model-based scatterplot optimization system 

Authors: Gregorio Palmas, March 2016
"""


import os 
from scatterplotimage import ScatterPlotter
import matplotlib.pyplot as plt
#import clustering
import model
import utilities
import colors
import optimizers
import plottingintermdata
import sys
import numpy as np
from scipy import stats
import itertools


# Data csv files, optimized plot image files, and temp directories
dataFilesDir = ".." + os.path.sep + "data" + os.path.sep + "figuresData" 
optimizedplotFilesDir = ".." + os.path.sep + "optimizedplots/exFigures"
tempDir = "temp"
tempDataFilesDir = tempDir + os.path.sep + "data"
tempImageplotFilesDir = tempDir + os.path.sep + "imageplots"
datapointsClusterFilePath_norunid = tempDataFilesDir + os.path.sep + "dpcluster_temp.csv" 
factorsTempFilePath_norunid = tempDataFilesDir + os.path.sep + "factors_temp.csv"
measuresTempFilePath_norunid = tempDataFilesDir + os.path.sep + "goodnessmeasures_temp.csv"

# Input data and output image plot file names and file paths 
#dataFileNames = ["randNorm_0.25corr_1000pnts.csv", "randNorm_0.5corr_1000pnts.csv", "randNorm_0.75corr_1000pnts.csv", "randNorm_1corr_1000pnts.csv", "randNorm_0.25corr_100pnts.csv", "randNorm_0.5corr_100pnts.csv", "randNorm_0.75corr_100pnts.csv", "randNorm_1corr_100pnts.csv"]
#dataFileNames = ["randNorm_1000pnts_1clusters_4.csv"]
#dataFileNames = [ "randNorm_100pnts_1clusters.csv","randNorm_500pnts_1clusters.csv","randNorm_1000pnts_1clusters.csv","randNorm_10000pnts_1clusters.csv", \
#                  "randNorm_1000pnts_2clusters.csv","randNorm_1000pnts_3clusters.csv","randNorm_1000pnts_4clusters.csv","randNorm_1000pnts_5clusters.csv","randNorm_10000pnts_5clusters.csv"]
#dataFileNames = ["randNorm_1000pnts_4clusters_0.csv","randNorm_1000pnts_5clusters_1.csv","randNorm_100pnts_4clusters_3.csv","randNorm_100pnts_5clusters_3.csv",\
#                "randNorm_10000pnts_5clusters_3.csv", "randNorm_10000pnts_5clusters_2.csv"]
#dataFileNames = ["randNorm_10000pnts_1clusters_7.csv"]
#dataFileNames = utilities.getFileNamesFromDir(dataFilesDir,"csv")

#dataFileNames = [ "randNorm_1000pnts_1clusters_5.csv","randNorm_10000pnts_1clusters_9.csv"]

#datafile for cluster vis example
#dataFileNames = ["randNorm_1000pnts_3clusters_clusters_ex.csv"]

#datafile for correlation example
dataFileNames = ["randNorm_1000pnts_2clusters_aspect_ratioex.csv"]

#datafile for overlap example
#dataFileNames = ["randNorm_50pnts_1clusters_6.csv"]

#datafile for overplotting example
#dataFileNames = ["randNorm_10000pnts_1clusters_2.csv"]

#datafile for outliers example
#dataFileNames = ["randNorm_1000pnts_1clusters_3.csv"]

# Discretized visual variables (design space)
#marker_sizes = np.linspace(3, 53, 21) # agreed upon on Jan 6
#marker_opacities = np.linspace(5, 255, 21) #agreed upon on Jan 6
#image_widths = [1000] #agreed upon Jan 6
#
#image_aspect_ratios = [.5]#for the cluster perc figure

#for the correlation example figure
#marker_sizes = [15.5]
#marker_opacities = [67.5]
#image_widths = [1000]
#image_aspect_ratios = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5]

#for the overlap example figure
#marker_sizes = np.linspace(3, 53, 21)
#marker_opacities = [255]
#image_widths = [1000]
#image_aspect_ratios = [1]

#for the overplot example figure and the outliers example
marker_sizes = np.linspace(3, 53, 21)
marker_opacities = np.linspace(5, 255, 21)
image_widths = [1000]
image_aspect_ratios = [1]


#marker_sizes = np.linspace(3, 53, 5)
#marker_opacities = np.linspace(5, 255, 5)
#image_widths = [1000]
#image_aspect_ratios = np.linspace(0.5, 1.5, 5)#[0.5, 1, 1.5]
#image_aspect_ratios = [0.5, 1, 1.5]
#image_aspect_ratios = [1]



# Use the following when plotting just one design
#marker_sizes=[10]
#marker_opacities=[155]
#image_widths = [1000]
#image_aspect_ratios=[1]

#winner design for image 2 clusters aspect ratio
marker_sizes=[15.5]
marker_opacities=[67.0]
image_widths = [1000]
image_aspect_ratios=[0.5]


### Design space used for creating the weight sets of the correlation task ###

#marker_sizes=[5, 20, 40]
#marker_opacities=[4, 50, 100, 255]
#image_widths = [1000]
#image_aspect_ratios=np.linspace(0.5, 1.5, 5)#[0.5, 1.0, 1.5]


#all the weight sets had total_angle_error, total_axis_error and totalSDySDxRatio_error set to 0 for the correlation task

"""
    Classic scatter plot with almost black, almost fully opaque, small dots.
    We prefer darker images (lightnessfactor).
"""
Weights_ClustersPercMin = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        1, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_ClustersPercMax = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                       -1, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )


Weights_OutliersPercMin = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        1, #OutliersPerceivability
                    ]
                    )

Weights_OutliersPercMax = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                       -1, #OutliersPerceivability
                    ]
                    )

Weights_CorrMin = np.array(
                    [
                        1, #total_angle_error,
                        2, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )
Weights_CorrMax = np.array(
                    [
                       -1, #total_angle_error,
                       -2, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_AngleMin = np.array(
                    [
                        1, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )
Weights_AngleMax = np.array(
                    [
                       -1, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_AxisMin = np.array(
                    [
                        0, #total_angle_error,
                        2, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_AxisMax = np.array(
                    [
                        0, #total_angle_error,
                       -2, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_OverlapMin = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        1, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_OverlapMax = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                       -1, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_OverlapMin = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        1, #overlapFactor,
                        0, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_OverplotMin = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                        1, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )

Weights_OverplotMax = np.array(
                    [
                        0, #total_angle_error,
                        0, #total_axis_error,
                        0, #totalSDySDxRatio_error,
                        0, #total_direction_error,
                        0, #total_perceived_bbox_error,
                        0, #overlapFactor,
                       -1, #overplottingFactor,
                        0, #lightnessFactor,
                        0, #meanFactor,
                        0, #contrastFactor,
                        0, #ClusterPerceivability,
                        0, #OutliersPerceivability
                    ]
                    )


WeightsArray = [np.ones_like(Weights_ClustersPercMin)]
WeightsArrayNames = ["AllOnes"]


#### Weight sets for the cluster visibility examples ####
#WeightsArray = [np.divide(Weights_ClustersPercMin, np.linalg.norm(Weights_ClustersPercMin)),
#                np.divide(Weights_ClustersPercMax, np.linalg.norm(Weights_ClustersPercMax))]
#WeightsArrayNames = ["Weights_ClustersPercMin","Weights_ClustersPercMax"]

#### Weight sets for the correlation examples####
#WeightsArray = [np.divide(Weights_CorrMin, np.linalg.norm(Weights_CorrMin)),\
#                np.divide(Weights_CorrMax, np.linalg.norm(Weights_CorrMax)),\
#                np.divide(Weights_AxisMax, np.linalg.norm(Weights_AxisMax)),\
#                np.divide(Weights_AxisMin, np.linalg.norm(Weights_AxisMin)),\
#                np.divide(Weights_AngleMin, np.linalg.norm(Weights_AngleMin)),
#                np.divide(Weights_AngleMax, np.linalg.norm(Weights_AngleMax))]
#WeightsArrayNames = ["Weights_CorrMin","Weights_CorrMax","Weights_AxisMax","Weights_AxisMin","Weights_AngleMin","Weights_AngleMax"]


#WeightsArray= [np.divide(Weights_OverlapMin, np.linalg.norm(Weights_OverlapMin)),
#               np.divide(Weights_OverlapMax, np.linalg.norm(Weights_OverlapMax))]
#WeightsArrayNames = ["Weights_OverlapMin","Weights_OverlapMax"]

#WeightsArray= [np.divide(Weights_OverplotMin, np.linalg.norm(Weights_OverplotMin)),
#               np.divide(Weights_OverplotMax, np.linalg.norm(Weights_OverplotMax))]
#WeightsArrayNames = ["Weights_OverplotMin","Weights_OverplotMax"]

#WeightsArray= [np.divide(Weights_OutliersPercMin, np.linalg.norm(Weights_OutliersPercMin)),
#               np.divide(Weights_OutliersPercMax, np.linalg.norm(Weights_OutliersPercMax))]
#WeightsArrayNames = ["Weights_OutliersPercMin","Weights_OutliersPercMax"]


colorDeltas = colors.getDeltaEBetweenCategoricalColorBrewerColors() # decided to use this with colorbrewer for 5 sets on 9th March 2016

# Info about visual variables and designs to be evaluated
max_marker_size = max(marker_sizes)
max_number_of_designs = len(marker_sizes) * len(marker_opacities) * len(image_widths) * len(image_aspect_ratios) #len(image_aspect_ratios) 

# Settings
# ... set searchTypes to none when plotting one design
searchTypes = ["exhaustive", "random", "none"]   
selectedSearch = searchTypes[2]
bSavePlotWithAxes = False # kept constant
plotIntermData = False  # for TESTING  

# For RANDOM SEARCH only: number of repetitions for the same data set points
number_of_repetitions = 4 #4

# For EXHAUSTIVE SEARCH only (or No search): all possible designs as a cartesian product of discretized visual variables 
allDesigns = None
if ((selectedSearch==searchTypes[0]) or (selectedSearch==searchTypes[2])):
    allDesigns = list(itertools.product(marker_sizes,marker_opacities,image_widths,image_aspect_ratios))


# Details to be logged for each optimized design that is found
log_headers = "start_datetime|end_datetime|max_marker_size|marker_size|marker_opacity|image_width|image_aspect_ratio|axis_actual_foreachcluster|angle_actual_foreachcluster|SDySDxRatio_actual_foreachcluster|axis_perceived_foreachcluster|angle_perceived_foreachcluster|SDySDxRatio_perceived_foreachcluster|axis_error_foreachcluster|angle_error_foreachcluster|unit_error_foreachcluster|SDySDxRatio_actualperceived_diff_foreachcluster|SDySDxRatio_error_foreachcluster|direction_error_foreachcluster|perceived_bbox_error_foreachcluster|overlapFactor_foreachcluster|overplottingFactor_foreachcluster|lightnessFactor_foreachcluster|meanFactor_foreachcluster|contrastFactor_foreachcluster|overlapFactor|overplottingFactor|lightnessFactor|meanFactor|contrastFactor|ClusterPerceivability|OutliersPerceivability|objective_score|noOfClusters\n"
if (selectedSearch==searchTypes[0]):
    log_headers = "n_incumbent|n_iters_overall|maxNoOfDesigns|" + log_headers
elif (selectedSearch==searchTypes[1]):    
    log_headers = "n_incumbent|n_iters_overall|n_iters_without_improvement|max_iters_without_improvement|" + log_headers


# Maximum number of cluster that we assume set of data points will have
#maxClusters = 4
    

# Get a scatter plotter
SP = ScatterPlotter(max_marker_size)

# Find an optimized scatterplot for each set of data points in the dataFileNames files
for dataFileName in dataFileNames:


    # ***************************************
    #  DATA part
    # *************************************** 


    #if "clusters_3" not in dataFileName and "clusters_6" not in dataFileName:
    #    continue

    # Load data points from csv file and cluster the data points
    dataFilePath = dataFilesDir + os.path.sep + dataFileName
    
    datapoints, datapointsClusters, datapointsClusters_woOutliers, \
    outliers_foreachcluster, corr_perCluster = \
            utilities.getDataPointsWithClustersFromCSVFile(dataFilePath, hasHeading=True, shuffle=False)
    
    DataBoundingBox = utilities.getBBox(datapoints)
    #datapointsClusters = {0:datapoints}
    #datapointsClusters = clustering.clusterDataPoints(datapoints, maxK=maxClusters)  # use this to detect the number of clusters in the data points
    TriangleIndices = np.stack(np.triu_indices(len(datapointsClusters), 1), axis=-1)
     
     
    # Compute covariance ellipse and its properties for each actual data points cluster
    clustersMeasures = {}
    actualCovEllipses = {}
    #actualCorrelations = {}
    actualSDySDxRatios = {}
    for d in range(0,len(datapointsClusters)):
        datapointsCluster = datapointsClusters[d]
        actual_covellipse, actual_minorOnMajorAxis, actual_angle = model.getDataCovarianceEllipse(datapointsCluster)
        clustersMeasures[d] = (actual_minorOnMajorAxis, actual_angle) 
        actualCovEllipses[d] = actual_covellipse
        #actualCorrelations[d] = stats.pearsonr(datapointsCluster[:,0], datapointsCluster[:,1])[0]
        actualSDySDxRatios[d] = model.getSDySDxRatioOfPointCloud(datapointsCluster)
        
    # Compute the cluster overlap measure for each pair of clusters
    actualPairwiseClusterOverlapMeasures, actualCovEllipsesOverlapAreas = model.getPairwiseClusterOverlapMeasures(actualCovEllipses, datapointsClusters, TriangleIndices)
   
    # Compute rendering order of clusters, Comment for cluster pic example
    #datapoints, \
    #datapointsClusters, \
    #outliers_foreachcluster, \
    #actualPairwiseClusterOverlapMeasures_relabelled, \
    #clustersMeasures_relabelled, \
    #actualCovEllipses_relabelled, \
    #actualSDySDxRatios_relabelled = model.getRenderingOrderOfClusters(datapoints, \
    #                                                                  datapointsClusters, \
    #                                                                  outliers_foreachcluster, \
    #                                                                  actualPairwiseClusterOverlapMeasures, \
    #                                                                  clustersMeasures, \
    #                                                                  actualCovEllipses, \
    #                                                                  actualSDySDxRatios, \
    #                                                                  TriangleIndices)
    
    actualPairwiseClusterOverlapMeasures_relabelled = actualPairwiseClusterOverlapMeasures
    clustersMeasures_relabelled = clustersMeasures
    actualCovEllipses_relabelled = actualCovEllipses
    actualSDySDxRatios_relabelled = actualSDySDxRatios

    # Get distinguishable RGB colors for clusters
    # Each color is a tuple of the form (r,g,b) where each of r, g and b values is in [0,1]
    color_foreachcluster = colors.getRGBColorsForClusters(actualPairwiseClusterOverlapMeasures_relabelled, colorDeltas, TriangleIndices)          
    
    # Plotting the actual data points and corresponding cluster ovariance ellipses for TESTING only 
    # this must be placed here, just after the order of the cluster rendering is finalized, otherwise the colors would be inconsistent with the once of the perceived
    if (plotIntermData):
        plottingintermdata.plotActualDataPointsAndClusterCovEllipses(datapointsClusters, actualCovEllipses_relabelled, dataFileName)  # for TESTING only

    
    
    print "\n******"+dataFileName+"******\n"
    
    
    # ***************************************
    #  DESIGN part
    # ***************************************
   
    for i in range(len(WeightsArray)):

        Weights = WeightsArray[i]
        WeightsName = WeightsArrayNames[i]

        # Output image plot file names and file paths 
        scatterplotImageFileName = None #dataFileName.replace(".csv","_"+runid+".png")
        scatterplotImageFilePath = None #tempImageplotFilesDir + os.path.sep + scatterplotImageFileName 
        if (plotIntermData):
            scatterplotImageFileName = dataFileName.replace(".csv",".png") # for TESTING only
            scatterplotImageFilePath = tempImageplotFilesDir + os.path.sep + scatterplotImageFileName  # for TESTING only   
        winningplotFilePath = optimizedplotFilesDir + os.path.sep + WeightsArrayNames[i] + os.path.sep + dataFileName.replace(".csv",".png")
        logFilePath = winningplotFilePath.replace(".png","_log.log")
        warningsFilePath = winningplotFilePath.replace(".png","_warnings.txt")
   
        
        if (selectedSearch==searchTypes[0]):
            # Optimization loop for EXHAUSTIVE SEARCH
            optimizers.exhaustiveSearch_PP(allDesigns,max_marker_size,max_number_of_designs,DataBoundingBox,datapointsClusters,color_foreachcluster,datapoints,
                                            outliers_foreachcluster, clustersMeasures_relabelled,actualCovEllipses_relabelled,actualSDySDxRatios_relabelled,SP,bSavePlotWithAxes,dataFileName,scatterplotImageFilePath,winningplotFilePath,
                                            warningsFilePath,logFilePath,log_headers,plotIntermData,TriangleIndices, Weights, False)
        
        elif (selectedSearch==searchTypes[1]):
            # Optimization loop for RANDOM SEARCH
            optimizers.randomSearch(number_of_repetitions,marker_sizes,marker_opacities,image_widths,image_aspect_ratios,max_marker_size,
                                    max_number_of_designs,DataBoundingBox,datapointsClusters,color_foreachcluster,datapoints, outliers_foreachcluster, 
                                    clustersMeasures_relabelled,actualCovEllipses_relabelled,actualSDySDxRatios_relabelled,SP,bSavePlotWithAxes,dataFileName,scatterplotImageFilePath,winningplotFilePath,
                                    warningsFilePath,logFilePath,log_headers,plotIntermData,TriangleIndices, Weights, True)

        else:
            # Generate the plot with the first possible design
            for design in allDesigns:

                #utilities.GetPerceivedPointsWithEllipses(design, DataBoundingBox, datapointsClusters, color_foreachcluster, winningplotFilePath)

                utilities.SavePerceivedPoints(design, DataBoundingBox, datapointsClusters, color_foreachcluster, winningplotFilePath.replace(".png","_perceived.csv"))

                #optimizers.PlotWinner(design, None, winningplotFilePath, "", datapointsClusters, datapoints, color_foreachcluster, SP, DataBoundingBox)


        
        
        
        
        