"""
Any utilities functions

Author: Luana Micallef, September 2015
"""

import numpy as np
import csv
import os, glob
from scipy.spatial import distance
from math import factorial
from math import atan
import scatterplotimage
import model
import matplotlib.pyplot as plt
import matplotlib
import random
from ellipse import Ellipse
from PIL import Image
import skimage
import os
import ImageMetrics
from randomdatapoints import savePntsWithClusterIdToCVSfile
import time
import re
import itertools


def ReadCSV(dataFilePath, xIndex, yIndex, hasHeading=False, dtype=None):

    skiprowsindex=0
    if (hasHeading): skiprowsindex=1
        
    datapoints_woClusterID = np.genfromtxt(dataFilePath, delimiter=",",
                               usecols = (xIndex, yIndex),
                               skip_header=skiprowsindex, dtype=dtype)

    #datapoints_woClusterID = []
    datapoints_wClusterID = {0: datapoints_woClusterID}
    datapoints_wClusterID_woOutliers = {0: datapoints_woClusterID}
    outliers_perCluster = {0: np.array([])}
    cluster_ids = [0]
    corr_perCluster = [0]

    return datapoints_woClusterID, datapoints_wClusterID, datapoints_wClusterID_woOutliers, \
           outliers_perCluster, corr_perCluster

# load data points from csv file
def getDataPointsFromCSVFile(dataFilePath, hasHeading=False, dtype=None):
    
    skiprowsindex=0
    if (hasHeading):
        skiprowsindex=1
    
    datapoints = np.genfromtxt(dataFilePath, delimiter=",", skip_header=skiprowsindex, dtype=dtype)

    return datapoints


# load data points with cluster info from csv file and return 
# (1) the data points without cluster info as one list with all points i.e., [[x1,y1],[x2,y2],...] and 
# (2) a numpy array of all the data points for each of the clusters  i.e., {0: [[x1,y1],[x2,y2],...], 1: [[x3,y3],[x4,y4],...] ] 
# (3) a numpy array of all the data points for each of the clusters  where all the points are NOT outliers i.e., {0: [[x1,y1],[x2,y2],...], 1: [[x3,y3],[x4,y4],...] ] 
# (4) a numpy array with the outliers of each cluster 
# (5) a numpy array with the correlation for each cluster
def getDataPointsWithClustersFromCSVFile(dataFilePath, hasHeading=False, dtype=None, shuffle=False):
    
    # load all data from file
    datapoints_xykor = getDataPointsFromCSVFile(dataFilePath, hasHeading, dtype)
    
    # shuffle the data points if requested
    if (shuffle):
        np.random.shuffle(datapoints_xykor)
   
    datapoints_woClusterID = []
    datapoints_wClusterID = {}
    datapoints_wClusterID_woOutliers = {}
    outliers_perCluster = {}
    cluster_ids = []
    corr_perCluster = []
    
    # save the details about the points
    for x,y,k,o,r in datapoints_xykor:
    
        # if the cluster id has not been recorded yet
        if (k not in cluster_ids):
            cluster_ids.append(k)
            corr_perCluster.append(r)
            
        # add point to the list of all points irrespective of cluster
        datapoints_woClusterID = datapoints_woClusterID + [[x,y]]
        
        # add the point to the list of the corresponding cluster
        try:
            datapoints_wClusterID[k].append([x,y])
        except KeyError:
            datapoints_wClusterID[k] = [[x,y]]
            
        # if the point is an outlier
        if (o == 1):
            try:
                outliers_perCluster[k].append([x,y])
            except KeyError:
                outliers_perCluster[k] = [[x,y]]  
        # if the point is a not an outlier
        else: 
            try:
                datapoints_wClusterID_woOutliers[k].append([x,y])
            except KeyError:
                datapoints_wClusterID_woOutliers[k] = [[x,y]]

    # if no outliers were found for a cluster, assign [] to cluster
    for k in cluster_ids:
        try:
            outliers_perCluster[k] = np.array(outliers_perCluster[k])
        except KeyError:
            outliers_perCluster[k] = np.array([])           

                      
    # create numpy arrays
    corr_perCluster = np.array(corr_perCluster)
    datapoints_woClusterID = np.array(datapoints_woClusterID)
    for k in datapoints_wClusterID:
        datapoints_wClusterID[k] = np.array(datapoints_wClusterID[k])
        datapoints_wClusterID_woOutliers[k] = np.array(datapoints_wClusterID_woOutliers[k])

    return datapoints_woClusterID, datapoints_wClusterID, datapoints_wClusterID_woOutliers, \
           outliers_perCluster, corr_perCluster
    

def GetDataWithoutOutliers(DataClusters, DataOutliers, Colors):

    FinalData = np.array([])
    FinalColor = np.array([])

    for d in range(0,len(DataClusters)):
        
        outliersIndices = np.zeros(len(DataOutliers[d]))
        for i in range(len(DataOutliers[d])):
            outliersIndices[i] = np.where(DataClusters[d] == DataOutliers[d][i])[0][0]
        
        cluster_no_ouliers = np.delete(DataClusters[d], outliersIndices, axis = 0)
        cluster_color_points = np.empty((len(cluster_no_ouliers),3))
        cluster_color_points[:] = Colors[d]

        if len(FinalData) == 0:
            FinalData = cluster_no_ouliers
            FinalColor = cluster_color_points

        else:
            FinalData = np.concatenate((FinalData, cluster_no_ouliers), axis = 0)
            FinalColor = np.concatenate((FinalColor,  cluster_color_points), axis = 0)

    return (FinalData, FinalColor)


def MergeDataPoints(DataPoints, DataColors):

    FinalData = np.array([])
    FinalColor = np.array([])

    for d in range(0,len(DataPoints)):

        if len(DataPoints[d]) > 0:

            color_points = np.empty((len(DataPoints[d]),3))
            color_points[:] = DataColors[d]

            if len(FinalData) == 0:
                FinalData = DataPoints[d]
                FinalColor = color_points

            else:
                FinalData = np.concatenate((FinalData, DataPoints[d]), axis = 0)
                FinalColor = np.concatenate((FinalColor,  color_points), axis = 0)

    return (FinalData, FinalColor)


def GetColorArray(DataClusters, ClusterColors):

    FinalColor = np.array([])

    for d in range(0,len(DataClusters)):

        cluster_color_points = np.empty((len(DataClusters[d]),3))
        cluster_color_points[:] = ClusterColors[d]

        if len(FinalColor) == 0:
            FinalColor = cluster_color_points
        else:
            FinalColor = np.concatenate((FinalColor,  cluster_color_points), axis = 0)

    return FinalColor

# creates a directory in case it is not there yet
def ensureDirectory(fileOrDirectoryPath):
    if not os.path.exists(fileOrDirectoryPath):
        directory = os.path.dirname(fileOrDirectoryPath)
        if directory and not os.path.exists(directory):
            os.makedirs(directory)

# write data points (x,y) to csvfile
def writeDataPointsToCSVfile(datapoints, filepath):
    ensureDirectory(filepath)
    with open(filepath, "w") as f:
        writer = csv.writer(f)
        writer.writerows([['x','y']])
        writer.writerows(datapoints)
    
    
# write 2D array to csvfile 
def write2DArrayToCSVfile(array, filepath):
    ensureDirectory(filepath)
    with open(filepath, "w") as f:
        writer = csv.writer(f)
        writer.writerows(array)


# write text to file
def writeToFile(filepath, text, append=False):
    ensureDirectory(filepath)
    attr = 'w'
    if (append):
        attr = 'a'
    f = open(filepath, attr)
    f.write(text)
    f.close()
    return


# load file content to list
def readFileToList(filepath):
    with open(filepath) as f:
       lines = f.read().splitlines()
       f.close()
       return lines


# Get filenames with extension from directory
def getFileNamesFromDir(dirpath, ext):
    cwd = os.getcwd()
    os.chdir(dirpath)
    filenames = glob.glob("*."+ext)
    os.chdir(cwd)
    return filenames


# get min and max x and y coordinates for a set of points of the form [[x1,y1],[x2,y2],...]
def getBBox(datapoints):
    minx, miny = np.min(datapoints, axis = 0)
    maxx, maxy = np.max(datapoints, axis = 0)
    return minx, maxx, miny, maxy



# returns the cartesian product of multiple int arrays
# source: http://stackoverflow.com/questions/1208118/using-numpy-to-build-an-array-of-all-combinations-of-two-arrays
# fast but converts all values in all arrays to int
def computeCartesianProductOfIntArrays(arrays, datatype='int,float'):
    arrays = [np.asarray(a) for a in arrays]
    shape = (len(x) for x in arrays)
    
    ix = np.indices(shape, dtype=datatype)
    ix = ix.reshape(len(arrays), -1).T
    
    for n, arr in enumerate(arrays):
        ix[:, n] = arrays[n][ix[:, n]]
        
    return ix
    
    
# returns the cartesian product of two numpy arrays   
def computeCartesianProductOfTwoNumpyArrays(array1, array2):
    return (np.dstack( (np.repeat(array1,array1.size), np.tile(array2,array2.size) ) )[0])
    # [0] is needed otherwise the array would be of the form [[[x1,y1],[x2,y2],...]]



# return the number of combinations 
# inputs: n=number of items available; k=number of items to be picked up 
def computeNumberOfCombinations(n,k):
    numerator=factorial(n)
    denominator=(factorial(k)*factorial(n-k))
    answer=numerator/denominator
    return answer


# Computes distance between each pair of the two collections of inputs.
# inputs: 2 numpy arrays of data points of the form [[x1 y1],[x2 y2],..]
# output: distance matrix (if array1 has N points and array2 M points, the matrix is N x M)
# (details at http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.spatial.distance.cdist.html)
def distanceBetweenEachPairOfPointsInTwoArrays(array1, array2):
    return distance.cdist(array1, array2, 'euclidean')


# Compute gradient of a line given the angle the line makes with the x-axis
# see e.g., http://www.intmath.com/plane-analytic-geometry/1b-gradient-slope-line.php
def getGradientOfLineGivenAngleToXAxis(angle):
    return atan(angle)

# Compute smallest angle in radians between 2 lines given only the gradient of the 2 lines
# see e.g., http://planetmath.org/anglebetweentwolines
def computeSmallestAngleInRadiansBetweenTwoLines(m1, m2):
    return atan( abs( (m1-m2)/(1+(m1*m2)) ) )

"""

Which of these images matches the scatterplot on the left? 
Choices: 
image with covariance ellipses corresponding to the scatterplot, 
image with covariance ellipses corresponding to the scatterplot but with 1 cluster removed, 
image with covariance ellipses corresponding to the scatterplot but with 1 cluster replaced


"""
def CreateEllipseImages(DataBoundingBox, Design, ClusterPoints, ClusterColor, OutputPath, DrawPoints = False, ellipses_scale = 2.8):

    Ellipses = []

    numPoints = 0

    for d in range(len(ClusterPoints)):
        ell = model.getCovEllipseForPoints(ClusterPoints[d], 1.5)
        ell.axis_minor  = max(ell.axis_minor, 0.01 * ell.axis_major)
        Ellipses.append(ell)
        numPoints += len(ClusterPoints[d])


    #if numPoints ==  100:
    #    ellipses_scale = 2.2

    # Originals
    SaveClustersEllipses(DataBoundingBox, Design, ClusterPoints, ClusterColor, OutputPath, Ellipses, DrawPoints,ellipses_scale)

    #Modified
    NewEllipses = list(Ellipses)

    id_toremove = random.randint(0, len(NewEllipses)-1)
    del NewEllipses[id_toremove]
    
    NewColors = np.delete(ClusterColor, id_toremove, 0)

    SaveClustersEllipses(DataBoundingBox, Design, ClusterPoints, NewColors, OutputPath.replace(".png", "rem.png") , NewEllipses, DrawPoints,ellipses_scale)


    NewEllipses = list(Ellipses)
    id_tomodify = random.randint(0, len(NewEllipses)-1)

    max_tries = 100
    curr_try = 0
    diff_threshold = 0.5

    NewEllipses[id_tomodify] = Ellipse(Ellipses[id_tomodify].xc, Ellipses[id_tomodify].yc, Ellipses[id_tomodify].axis_major, Ellipses[id_tomodify].axis_minor, Ellipses[id_tomodify].angle)
    NewEllipses[id_tomodify].angle = np.random.normal(90, 45)
    NewEllipses[id_tomodify].axis_minor *= np.random.normal(1, 0.5)
    NewEllipses[id_tomodify].axis_major *= np.random.normal(1, 0.5)

    while not CheckModifiedEllipse(NewEllipses[id_tomodify], Ellipses[id_tomodify], ClusterPoints[id_tomodify], DataBoundingBox, Design, diff_threshold, ellipses_scale) \
          and curr_try < max_tries:
      
    
        NewEllipses[id_tomodify] = Ellipse(Ellipses[id_tomodify].xc, Ellipses[id_tomodify].yc, Ellipses[id_tomodify].axis_major, Ellipses[id_tomodify].axis_minor, Ellipses[id_tomodify].angle)
        NewEllipses[id_tomodify].angle = np.random.normal(90, 45)
       
        NewEllipses[id_tomodify].axis_minor *= np.random.normal(1, 0.5)
    
       
        NewEllipses[id_tomodify].axis_major *= np.random.normal(1, 0.5)
    
        curr_try += 1
     
    SaveClustersEllipses(DataBoundingBox, Design, ClusterPoints, ClusterColor, OutputPath.replace(".png", "mod.png") , NewEllipses, DrawPoints, ellipses_scale)

    
    mod_logs = open(OutputPath.replace(".png", "modlog.log"), "w")

    mod_logs.write("removed ellipse: " +str(id_toremove)+"\n")
    mod_logs.write("modified ellipse: " +str(id_tomodify)+"\n")
    mod_logs.write("old mayor axis: " +str(Ellipses[id_tomodify].axis_major)+"\n")
    mod_logs.write("new mayor axis: " +str(NewEllipses[id_tomodify].axis_major)+"\n")
    mod_logs.write("old minor axis: " +str(Ellipses[id_tomodify].axis_minor)+"\n")
    mod_logs.write("new minor axis: " +str(NewEllipses[id_tomodify].axis_minor)+"\n")
    mod_logs.write("old angle axis: " +str(Ellipses[id_tomodify].angle)+"\n")
    mod_logs.write("new angle: " +str(NewEllipses[id_tomodify].angle)+"\n")

    mod_logs.close()

    return True

def CheckModifiedEllipse(NewEllipse, OldEllipse, ClusterPoints, DataBoundingBox, Design, DiffThreshold, ellipses_scale = 3.1):

    if not EllipseInsideGraph(NewEllipse, ClusterPoints, DataBoundingBox, Design, ellipses_scale = ellipses_scale):
        return False

    NewEllipBuffer = GetEllipseBuffer(NewEllipse, ClusterPoints, DataBoundingBox, Design, EllipsesScaleFact = ellipses_scale)
    NewEllipBufferImage = NewEllipBuffer[..., :3].astype(np.float64) / 255.
    NewEllipBufferAlpha = NewEllipBuffer[...,  3].astype(np.float64) / 255.
    NewEllipBufferAlphaMask = ImageMetrics.PrepareAlphaMask(NewEllipBufferAlpha)

    OldEllipBuffer = GetEllipseBuffer(OldEllipse, ClusterPoints, DataBoundingBox, Design, EllipsesScaleFact = ellipses_scale)
    OldEllipBufferImage = OldEllipBuffer[..., :3].astype(np.float64) / 255.
    OldEllipBufferAlpha = OldEllipBuffer[...,  3].astype(np.float64) / 255.
    OldEllipBufferAlphaMask = ImageMetrics.PrepareAlphaMask(OldEllipBufferAlpha)

    combined_mask = np.amax((NewEllipBufferAlphaMask, OldEllipBufferAlphaMask), axis = 0)

    #Only for testing
    #Image.fromarray(skimage.img_as_ubyte(NewEllipBufferImage)).save('test_buffer_newell.png')
    #Image.fromarray(skimage.img_as_ubyte(OldEllipBufferImage)).save('test_buffer_oldell.png')
    #Image.fromarray(skimage.img_as_ubyte(combined_mask)).save('test_combined_mask.png')

    difference = 1 - ImageMetrics.compare_ssim(OldEllipBufferImage, NewEllipBufferImage, combined_mask,
                     multichannel = True,
                     gaussian_weights = True, sigma = 1.5, use_sample_covariance = False,
                     full = False)

    return difference >= DiffThreshold

def EllipseInsideGraph(EllipseToDraw, ClusterPoints, DataBoundingBox, Design, SimThreshold = 0.99, ellipses_scale = 3.1):

    EllipseImgBuffer = GetEllipseBuffer(EllipseToDraw, ClusterPoints, DataBoundingBox, Design, True, ellipses_scale)
    EllipBufferImage = EllipseImgBuffer[..., :3].astype(np.float64) / 255.

    SP = scatterplotimage.ScatterPlotter(Design[0])
    fig, scatter_plot = plt.subplots();
    fig.tight_layout()
    background = fig.patch
    background.set_facecolor((1,1,1,0))
    scatter_plot.grid(False)

    SP.FillPlot(fig, scatter_plot, ClusterPoints, DataBoundingBox, Design[0], 0, Design[2], Design[3])

    scatter_plot.axis('on')

    WhiteImageBuffer = SP.fig2data(fig)

    plt.close(fig)
    
    WhiteImage = WhiteImageBuffer[..., :3].astype(np.float64) / 255.

    WhiteImageAlphaMask = np.empty((WhiteImage.shape[0], WhiteImage.shape[1]))
    WhiteImageAlphaMask.fill(int(0))

    OffsetMask = 80

    range1 = np.concatenate((range(OffsetMask), range(len(WhiteImageAlphaMask) -1 - OffsetMask, len(WhiteImageAlphaMask) -1)))
    range2 = np.concatenate((range(OffsetMask), range(len(WhiteImageAlphaMask[0]) -1 - OffsetMask, len(WhiteImageAlphaMask[0]) -1)))
    WhiteImageAlphaMask[range1, :] = 1
    WhiteImageAlphaMask[:, range2] = 1

    #Testing
    #Image.fromarray(skimage.img_as_ubyte(WhiteImage)).save('out_white.png')
    #Image.fromarray(skimage.img_as_ubyte(EllipBufferImage)).save('out_ellipse.png')

    similarity = ImageMetrics.compare_ssim(WhiteImage, EllipBufferImage, WhiteImageAlphaMask,
                 multichannel = True,
                 gaussian_weights = True, sigma = 1.5, use_sample_covariance = False,
                 full = False)

    return similarity >= SimThreshold


def GetEllipseBuffer(EllipseToDraw, ClusterPoints, DataBoundingBox, Design, AxesOn = False, EllipsesScaleFact = 3.1):

    SP = scatterplotimage.ScatterPlotter(Design[0])

    fig, scatter_plot = plt.subplots();
    

    fig.tight_layout()
    background = fig.patch
    background.set_facecolor((1,1,1,0))
    scatter_plot.grid(False)

    SP.FillPlot(fig, scatter_plot, ClusterPoints, DataBoundingBox, Design[0], 0, Design[2], Design[3])

    new_ellip = matplotlib.patches.Ellipse(xy=[EllipseToDraw.xc, EllipseToDraw.yc], width=EllipseToDraw.axis_major*EllipsesScaleFact, height=EllipseToDraw.axis_minor*EllipsesScaleFact, alpha=1,  angle=EllipseToDraw.angle, color = 'black', linewidth = 5)
        
    ax = plt.gca()
    ax.add_artist(new_ellip)

    scatter_plot.axis('off')

    if AxesOn:
        scatter_plot.axis('on')

    ImageBuffer = SP.fig2data(fig)

    plt.close(fig)

    return ImageBuffer

def SaveClustersEllipses(DataBoundingBox, Design, ClusterPoints, ClusterColor, OutputPath, Ellipses, DrawPoints = False, EllipsesScaleFact = 3.1):

    SP = scatterplotimage.ScatterPlotter(Design[0])

    scatterPlotDesign = np.array(Design)


    fig, scatter_plot = plt.subplots();
    scatter_plot.axis('on')

    fig.tight_layout()
    background = fig.patch
    background.set_facecolor((1,1,1,0))
    scatter_plot.grid(False)

    if not DrawPoints:
        scatterPlotDesign[1] = 0

    for i, ellipse in enumerate(Ellipses):


        SP.FillPlot(fig, scatter_plot, ClusterPoints[i], DataBoundingBox, scatterPlotDesign[0], scatterPlotDesign[1], scatterPlotDesign[2], scatterPlotDesign[3], ClusterColor[i])

        ellip = matplotlib.patches.Ellipse(xy=[ellipse.xc, ellipse.yc], width=ellipse.axis_major*EllipsesScaleFact, height=ellipse.axis_minor*EllipsesScaleFact, alpha=1,  angle=ellipse.angle, facecolor = 'none', edgecolor = 'black', linewidth = 5)
        
        ax = plt.gca()
        ax.add_artist(ellip)

    plt.savefig(OutputPath)

    plt.close(fig)


def MakeListOfCorrError(dirPath):
    listFiles = os.listdir(dirPath)
    
    outputFileName = "corr_errors_list.csv"
    
    outputFile = open(dirPath + "/" + outputFileName, "w")
    outputFile.write("FileName,AspectRatio,AngleError,AxisError\n")
    
    for file in listFiles:
    
        if ".log" in file:
            
            winnerLine = GetWinnerDesignRowFromLogs(dirPath + "/" + file)

            if len(winnerLine) > 0:
                
                angle_error =  winnerLine["angle_error_foreachcluster"][0]
                axis_error =  winnerLine["axis_error_foreachcluster"][0]
                aspect_ratio = winnerLine["image_aspect_ratio"][0]
                
                outputFile.write(file.replace(".log",".png")+ "," +str(aspect_ratio)+ "," +str(angle_error)+ "," +str(axis_error)+"\n")
    
    
    outputFile.close()



def GetWinnerDesignRowFromLogs(logPath):

    print "processing " + logPath
    
    data = np.genfromtxt(logPath,delimiter = "|",skip_header = 0,skip_footer = 1, dtype=None, names = True)

    winnerLineIdx = np.where(data["description"] == "winner_design")[0]
    winnerLine = data[winnerLineIdx]

    return winnerLine

def CreateEllipsesFromClusterLogs(ClusterPoints, ClusterColors, LogFile, DataBBox, OutputFolder):

    winnerLine = GetWinnerDesignRowFromLogs(LogFile)
    ensureDirectory(OutputFolder)

    WinnerDesign = [winnerLine["marker_size"], winnerLine["marker_opacity"], winnerLine["image_width"], winnerLine["image_aspect_ratio"]]

    CreateEllipseImages(DataBBox, WinnerDesign, ClusterPoints,ClusterColors, OutputFolder)


def SavePerceivedPoints(design, DataBoundingBox, ClusterPoints, ClusterColors, outputPath):

    fig, scatter_plot = plt.subplots();
    fig.tight_layout()
    background = fig.patch
    background.set_facecolor((1,1,1,0))
    
    scatter_plot.grid(False)
    
    SP = scatterplotimage.ScatterPlotter(design[0])

    ClustersPercPoints = []
    k_id = []


    for d in range(len(ClusterPoints)):

        plt.cla()

        SP.FillPlot(fig, scatter_plot, ClusterPoints[d], DataBoundingBox, design[0], design[1], design[2], design[3])
        scatter_plot.axis('off')

        ClusterBuffer = SP.fig2data(fig)

        (PercEdges, PercPoints) = model.getPerceivedPoints(ClusterBuffer)

        ClustersPercPoints.append(PercPoints)
        clusterID = np.empty(len(PercPoints))
        clusterID[:] = d

        k_id.append(clusterID)

    plt.close(fig)

    final_points = np.concatenate((ClustersPercPoints[:]))
    final_k_id = np.concatenate((k_id[:]))

    o = np.zeros(len(final_k_id))
    r = np.zeros(len(final_k_id))

    final_data = np.column_stack((final_points, final_k_id, o, r))

    savePntsWithClusterIdToCVSfile(final_data, outputPath, append=False)


def GetPerceivedPointsWithEllipses(design, DataBoundingBox, ClusterPoints, ClusterColors, outputPath):

    #getting the pereceived points

    fig, scatter_plot = plt.subplots();
    fig.tight_layout()
    background = fig.patch
    background.set_facecolor((1,1,1,0))
    
    scatter_plot.grid(False)
    
    SP = ScatterPlotter(design[0])

    ClustersPercPoints = []
    ClustersEllipses = []

    for d in range(len(ClusterPoints)):

        plt.cla()

        SP.FillPlot(fig, scatter_plot, ClusterPoints[d], DataBoundingBox, design[0], design[1], design[2], design[3])
        scatter_plot.axis('off')

        ClusterBuffer = SP.fig2data(fig)

        (PercEdges, PercPoints) = model.getPerceivedPoints(ClusterBuffer)

        ClustersPercPoints.append(PercPoints)
        ClustersEllipses.append(model.getCovEllipseForPoints(PercPoints, 1.5))

    plt.close(fig)

    PercBoundingBox = utilities.getBBox(np.concatenate((ClustersPercPoints[:])))

    SaveClustersEllipses(PercBoundingBox, design, ClustersPercPoints, ClusterColors, outputPath, ClustersEllipses, True, 2.1)


# Returns current date and time in the form of yyyymmdd_hhmmss  e.g., '20161013_211737'
def getCurrentDateTime():
    return time.strftime("%Y%m%d_%H%M%S")


# Return all the indexes of substring in string 
# e.g., if string="abce_123_asd_456_log", substring_tofind="_", output is [4, 8, 12, 16]
def getAllIndexesOfSubStringInString(string, substring_tofind):
    substring_tofind = '\\' + substring_tofind  # \ is needed just in case 
                                                # substring_tofind is a special character for re
                                                # \\ is needed to add \ to a string
    return [m.start() for m in re.finditer(substring_tofind,string)]


# Retrieves the winning designs from the optimizer's log files in a directory with path logsMainDirPath for the
# different weight sets used (weightSetsDirNames) and saves the designs in a file with path outputFilePath
def getWinningDesigns(logsMainDirPath, weightSetsDirNames, outputFilePath): 
    
    logFileExt = "log.txt"
    
    outputFile = open(outputFilePath, "w") 
    fileJustOpened = True
    
    for weightSetDirName in weightSetsDirNames:
    
        logsDirPath = logsMainDirPath + os.path.sep + weightSetDirName
        logFileNames = getFileNamesFromDir(logsDirPath,logFileExt)
        
        for logFileName in logFileNames:
            logFileLines = readFileToList(logsDirPath + os.path.sep + logFileName)
    
            if (fileJustOpened):
                designHeaders = "|".join(logFileLines[0].split("|")[5:10])
                outputFile.write("DataFileName|WeigthSet|"+designHeaders+"\n")  
                fileJustOpened = False
            
            winnerLog = logFileLines[-2]
            winnerLogVars = winnerLog.split("|")
    
            sepIndexesInLogFileName = getAllIndexesOfSubStringInString(logFileName,"_")
            dataName = logFileName[:sepIndexesInLogFileName[3]]
            winningDesign = "|".join(winnerLogVars[5:10])
            outputFile.write(dataName + "|" + weightSetDirName + "|" + winningDesign)
            
            if ( ((weightSetsDirNames.index(weightSetDirName)+1) < len(weightSetsDirNames)) or \
                 ((logFileNames.index(logFileName)+1) < len(logFileNames)) ):
                outputFile.write('\n')

    outputFile.close()
    
    return
    
    

# Retrieves the designs from the plot file name of all pngs found in plotsMainDirPath and saves the
# designs ina file with path outputFilePath (this file format is same as that generated by getWinningDesigns)
def getDesignsForPlotFileNames(plotsMainDirPath, outputFilePath): 
    
    plotFileNames = getFileNamesFromDir(plotsMainDirPath,"png")
    outputFile = open(outputFilePath, "w") 
    fileJustOpened = True
    
    for pfn in plotFileNames:
    
        if (fileJustOpened):
            designHeaders = "DataFileName|WeigthSet|max_marker_size|marker_size|marker_opacity|image_width|image_aspect_ratio"
            outputFile.write(designHeaders+"\n")  
            fileJustOpened = False
            
        indxNameDesignSep = pfn.rindex("_")
        plotName = pfn[:indxNameDesignSep]
        design = pfn[indxNameDesignSep+1:].replace(".png","").replace("-","|")
        
        outputFile.write(pfn.replace(".png","") + "||" + design)
            
        if ( (plotFileNames.index(pfn)+1) < len(plotFileNames) ):
            outputFile.write('\n')

    outputFile.close()
    
    return



# Generates plots for winning designs with black markers, no axes and no background
def generatePlotsForWinningDesigns (winningdesignsDirPath, winningdesignsFileName, dataDirPath, outputDirName):
    
    designs = readFileToList(winningdesignsDirPath + os.path.sep + winningdesignsFileName)[1:]  # remove header
    
    outputPlotDirPath = winningdesignsDirPath + os.path.sep + outputDirName
    if not os.path.exists(outputPlotDirPath):
        os.makedirs(outputPlotDirPath)  # ensureDirectory(outputPlotDirPath) # not working 
    
    for design in designs: 
        designVars = design.split("|") 

        dataFileName = designVars[0]
        weightSet = designVars[1]
        max_marker_size = float(designVars[2])
        marker_size = float(designVars[3])
        marker_opacity = float(designVars[4])
        image_width = float(designVars[5])
        image_aspect_ratio = float(designVars[6])
        
        dataFilePath = dataDirPath + os.path.sep + dataFileName + ".csv"
        outputPlotFilePath = outputPlotDirPath + os.path.sep + dataFileName + "_" + weightSet + ".png"
        
        scatterplotimage.GeneratePlotWithDesignForData(dataFilePath, outputPlotFilePath, \
                                   max_marker_size, marker_size, marker_opacity, \
                                   image_width, image_aspect_ratio, \
                                   marker_rgb_color=(0,0,0), showAxes=False, transparentBackground=True)
    return
        

# Generates plots for the given set of designs and datasets
def generatePlotsWithDesignsForDatasets (dataDirPath, outputDirName, marker_sizes, marker_opacities, image_widths, image_aspect_ratios):
   
    outputPlotDirPath = dataDirPath + os.path.sep + outputDirName
    if not os.path.exists(outputPlotDirPath):
        os.makedirs(outputPlotDirPath)  # ensureDirectory(outputPlotDirPath) # not working     

    max_marker_size = max(marker_sizes)
    dataFileNames = getFileNamesFromDir(dataDirPath,"csv")
    allDesigns = list(itertools.product(marker_sizes,marker_opacities,image_widths,image_aspect_ratios))
    designDel = "-"
    
    for dataFileName in dataFileNames:
        for s, o, w, a in allDesigns:
        
            outputPlotFilePath = outputPlotDirPath + os.path.sep + dataFileName.replace(".csv","") + "_" + str(s) + designDel + str(o) + designDel + str(w) + designDel + str(a) + ".png"
            dataFilePath = dataDirPath + os.path.sep + dataFileName
            scatterplotimage.GeneratePlotWithDesignForData(dataFilePath, outputPlotFilePath, \
                                       max_marker_size, s, o, \
                                       w, a, \
                                       marker_rgb_color=(0,0,0), showAxes=False, transparentBackground=True)        



# Load the pixels of the image and returns a list of e.g., RGBA values of all pixels
# in the form e.g., [(0,0,0,0), (255,255,255,255), ... (0,0,0,0)] for RGBA
def getPixelsFromImage (imageFilePath):

    im = Image.open(imageFilePath)     
    pixelsList = list(im.getdata()) 

    return pixelsList



# Return (width, height) of the image
def getImageWidthHeight (imageFilePath):

    im = Image.open(imageFilePath) 
    width, height = im.size
    
    return (width, height)



# Replace the character with index char_index in the string text with new_char
# e.g., text="abc", new_char="X", char_index=1, replaceCharAtIndex -> "aXc"
def replaceCharAtIndex (text, new_char, char_index):
    return text[:char_index] + new_char + text[char_index+1:]














