
import os 
from scatterplotimage import ScatterPlotter
import matplotlib
import matplotlib.pyplot as plt

#import clustering
import model
import utilities
import colors
import optimizers
import plottingintermdata
import sys
import numpy as np
from scipy import stats
import itertools


### Design from Ronald Rensik ###
#http://stackoverflow.com/questions/139655/convert-pixels-to-points and
#https://www.w3.org/TR/CSS21/syndata.html#x39 for converting points to pixels
#it is points  = 72/pic.dpi * pixels, in out case pic.dpi is 72, so 1point = 1pixel

dataFilesDir = ".." + os.path.sep + "data" + os.path.sep + "study2Data/corr" #"study2Data/outliers" #"study2Data/corr" #"study2Data/clusters" 
dataFileNames = utilities.getFileNamesFromDir(dataFilesDir,"csv")

marker_sizes=[2]
marker_opacities=[255]
image_widths = [300]
image_aspect_ratios=[1.0]

SP = ScatterPlotter(marker_sizes[0])

# Find an optimized scatterplot for each set of data points in the dataFileNames files
for dataFileName in dataFileNames:

    dataFilePath = dataFilesDir + "/" + dataFileName

    data = np.genfromtxt(dataFilePath, dtype = None, names = True, delimiter = ",")
    csvHeader =  data.dtype.names

    datapoints, datapointsClusters, datapointsClusters_woOutliers, \
    outliers_foreachcluster, corr_perCluster = \
            utilities.getDataPointsWithClustersFromCSVFile(dataFilePath, hasHeading=True, shuffle=False)
    
    DataBoundingBox = utilities.getBBox(datapoints)
    #datapointsClusters = {0:datapoints}
    #datapointsClusters = clustering.clusterDataPoints(datapoints, maxK=maxClusters)  # use this to detect the number of clusters in the data points
    TriangleIndices = np.stack(np.triu_indices(len(datapointsClusters), 1), axis=-1)

    clustersMeasures = {}
    actualCovEllipses = {}
    #actualCorrelations = {}
    actualSDySDxRatios = {}
    for d in range(0,len(datapointsClusters)):
        datapointsCluster = datapointsClusters[d]
        actual_covellipse, actual_minorOnMajorAxis, actual_angle = model.getDataCovarianceEllipse(datapointsCluster)
        clustersMeasures[d] = (actual_minorOnMajorAxis, actual_angle) 
        actualCovEllipses[d] = actual_covellipse
        #actualCorrelations[d] = stats.pearsonr(datapointsCluster[:,0], datapointsCluster[:,1])[0]
        actualSDySDxRatios[d] = model.getSDySDxRatioOfPointCloud(datapointsCluster)


    colorDeltas = colors.getDeltaEBetweenCategoricalColorBrewerColors()
    actualPairwiseClusterOverlapMeasures, actualCovEllipsesOverlapAreas = model.getPairwiseClusterOverlapMeasures(actualCovEllipses, datapointsClusters, TriangleIndices)
    color_foreachcluster = colors.getRGBColorsForClusters(actualPairwiseClusterOverlapMeasures, colorDeltas, TriangleIndices)   

    fig, scatter_plot = plt.subplots();
    #fig.tight_layout()
    background = fig.patch
    background.set_facecolor((1,1,1,0))
    scatter_plot.grid(False)

    #fig.tight_layout()
    plt.gcf().subplots_adjust(bottom=0.15)
    plt.gcf().subplots_adjust(left=0.15)

    fontSizeLabels = 16
    if "poverty_converted" in dataFileName:
        fontSizeLabels = 10


    plt.xlabel(csvHeader[0], fontsize=fontSizeLabels)
    plt.ylabel(csvHeader[1], fontsize=fontSizeLabels)

    for d in range(len(datapointsClusters)):

        color = color_foreachcluster[d]

        SP.FillPlot(fig, scatter_plot, datapointsClusters[d],
                            DataBoundingBox,
                            marker_sizes[0],
                            marker_opacities[0],
                            image_widths[0],
                            image_aspect_ratios[0],
                            color)
    

    fontSize = 6

    axes = plt.gca()
    for tick in axes.yaxis.get_major_ticks():
        tick.label.set_fontsize(fontSize)
    
    for tick in axes.xaxis.get_major_ticks():
       tick.label.set_fontsize(fontSize)

    plt.savefig(dataFilePath.replace(".csv", ".png"))

    plt.close(fig)

