# Libraries
import os
import pandas as pd
import re
import datetime as dt
import sys
import csv
import time
import glob as glob
import math

def filename_parser(filename):
    '''

    :param filename: Name of files within county folders
    :return: A string representation of the metadata information parsed from the filename
    '''

    # check if any of the following metadata strings are present in the file name.
    srprec      = re.findall(r'srprec', filename)
    rgprec      = re.findall(r'rgprec', filename)
    rrprec      = re.findall(r'rrprec', filename)
    svprec      = re.findall(r'svprec', filename)
    absentee    = re.findall(r'absentees', filename)
    mailballot  = re.findall(r'mailballot', filename)
    registration = re.findall(r'registration', filename)
    poll_voters = re.findall(r'poll_voters', filename)
    voters      = re.findall(r'(?<=\d_)voters', filename)
    non_voters  = re.findall(r'nonvoters', filename)
    sov         = re.findall(r'sov_data', filename)
    to_city     =   re.findall(r'to_city', filename)
    sr_blk      = re.findall(r'sr_blk_map', filename)
    rg_blk      = re.findall(r'rg_blk_map', filename)
    rg_2011blk = re.findall(r'rg_2011blk',filename)
    sr_2011blk = re.findall(r'sr_2011blk',filename)
    # A nested list with the variable string present if the filename variable is true. Ex: [['srprec'],[],[],['sov'],.....]
    keywords = [srprec, rgprec, rrprec, svprec, absentee, mailballot, registration, poll_voters, voters, non_voters, sov, to_city, sr_blk, rg_blk, rg_2011blk,sr_2011blk]

    # Flattens the nested list variable keywords. Ex: ['srprec', 'sov']
    flat_list = [item for sublist in keywords for item in sublist]

    # list of key words are joined. Ex: if rgprec and voters is present in filename. The two words are concatenated into rgprec_voters.
    # This is used later as keys in the dictionary in the main function to keep appending when going through all county codes
    if not flat_list:
        return False
    else:
        keywordString = '_'.join(flat_list)
        return keywordString


def parse_election_year(filename):
    '''

    :param electionCode: election code. EX: G18
    :return: the last digit ex. 8
    '''
    year = filename.split('/')
    res = year[-1]
    return res.split('_')[1]

def parse_county_code(filename):
    county_code = filename.split('/')[-1]
    county_code = str(county_code)

    return county_code.split('_')[0][1:]

def parse_county_number(filename):
    county_code = filename.split('/')[-1]
    county_code = str(county_code)

    countyDigits = county_code.split('_')[0][1:].lstrip("0")
    county_number = (int(countyDigits) + 1)// 2
    return str(county_number)



def file_type_parser(filename):
    '''

    :param filename:
    :return: the filename
    '''

    # check if any of the following metadata strings are present in the file name.
    srprec      = re.findall(r'srprec', filename)
    rgprec      = re.findall(r'rgprec', filename)
    rrprec      = re.findall(r'rrprec', filename)
    svprec      = re.findall(r'svprec', filename)
    sr_blk      = re.findall(r'sr_blk', filename)
    rg_blk      = re.findall(r'rg_blk', filename)
    rg_2011blk = re.findall(r' rg_2011blk',filename)
    sr_2011blk = re.findall(r'sr_2011blk',filename)
    if srprec:
        return srprec[0]
    elif rgprec:
        return rgprec[0]
    elif rrprec:
        return rrprec[0]
    elif svprec:
        return svprec[0]
    elif sr_blk:
        return sr_blk[0]
    elif rg_blk:
        return rg_blk[0]
    elif rg_2011blk:                                                                                                             return rg_2011blk[0]
    elif sr_2011blk:
        return sr_2011blk[0]
    else:
        return False

def get_files():
    '''
    :param merge_type: the type of files you want to merge
    :return: a dictionary: keys- file type e.g registration, absentee_voters: values: files paths of the files that match the key

    '''
    data_type_to_merge = ''
    args = sys.argv[1:]
    path = args[0]
    if len(args) > 1:
        data_type_to_merge = args[1]
    files_to_work_on = glob.glob(path +'*.csv')
    set_ = set()
    print(set_)
    dictionary_of_counties = dict()
    dictionary_of_files_types = dict()
    for file_ in files_to_work_on:
        file_type = filename_parser(file_)
        set_.add(file_type)
        lst_of_same_file_types = []
 
        if not file_type in dictionary_of_files_types.keys():
            lst_of_same_file_types.append(file_)
            dictionary_of_files_types[file_type] = lst_of_same_file_types
        else:
            dictionary_of_files_types[file_type].append(file_)
   # print(set_ )
    if data_type_to_merge is not None or data_type_to_merge is not '':
        data_subset_to_merge = dict()
        data_subset_to_merge[data_type_to_merge] = dictionary_of_files_types.get(data_type_to_merge)
        #dictionary_of_files_types = data_subset_to_merge
    return dictionary_of_files_types

def block_key(df, fips):
    '''
    :param df: dataframe
    :param fips: fips code
    :return: a Series containing string representations of the block keys
    '''
   
   # result = []
    #df_list =pd.Series(df['tract'].astype(str))
    #df_list = df_list.str.zfill(6)
    #for item in df_list:
     #   if item:
      #      result.append(fips + item+ df['block'])
    #return result 
    ## if i do as below i get csv character escape error
    return pd.Series(fips + df['tract'].str.zfill(6) + df['block'].str.zfill(4))

def sprec_key(df, fips, key):

    key_list= []
    for i in range(0, len(df)):

        key_value = df[key].iloc[i]

        if isinstance(key_value, str):

            complete_key = str(fips) + str(key_value)
            key_list.append(str(f'"{complete_key}"')) 
        elif key_value == ' ' or key_value == math.isnan(key_value) or key_value == 'NULL' or key_value == None:
            
            complete_key = str(fips) + 'NULL'
            
            key_list.append(str(f'"{complete_key}"'))
        else:
            complete_key = str(fips) + str(key_value)
            
            key_list.append(str(f'"{complete_key}"'))
    return key_list



def add_new_columns(filename):
    df = pd.read_csv(os.path.join(path, filename), dtype = str)
    df.columns = map(str.lower, df.columns)
    file_type_key = file_type_parser(filename)    
    try:
        precinctType = ['srprec', 'rrprec', 'svprec', 'rgprec']
        fips = str("06" + parse_county_code(filename))
        df.insert(loc=0, column='county', value=[str(f'"{int(n)}"') for n in [parse_county_number(filename)] * len(df)])
        df.insert(loc=1, column='fips', value=[str(f'"{m}"') for m in [fips] * len(df)])
    except Exception as ex:
        print(ex)


    if file_type_key in precinctType:
        df.insert(loc=2, column= f"{file_type_key}_key", value= sprec_key(df, fips,  file_type_key))
    elif file_type_key == 'sr_blk' or file_type_key == 'rg_blk' or file_type_key == 'sr_2011blk' or file_type_key == 'rg_2011blk' :
        prec_type = file_type_key.split('_')[0] # sr | rg
        prec_code = f"{prec_type}prec"
        electionType = parse_election_year(filename).lower()

        df[prec_code] = f"{df[prec_code]}"
        df.insert(loc=2, column= 'ELECTION', value=[electionType] * len(df))
        df.insert(loc=3, column= 'TYPE', value=[file_type_key] * len(df))
        df.insert(loc=4, column= f"{prec_code}_key", value = sprec_key(df, fips, prec_code))
        df.insert(loc=5, column= 'BLOCK_KEY', value=block_key(df, fips))

    else:
        pass

    df.columns = map(str.upper, df.columns)
    return df



def add_new_columns_e(filename):
    df = pd.read_csv(os.path.join(path, filename), dtype = str)
    df.columns = map(str.lower, df.columns)
    try:
        precinctType = ['rg_2011blk','rg_2011blk','srprec', 'rrprec', 'svprec', 'rgprec']
        file_typekey = file_type_parser(filename)
        fips = str("06" + parse_county_code(filename))
        df.insert(loc=0, column='county', value=[str(f'"{int(n)}"') for n in [parse_county_number(filename)] * len(df)])
        #df.insert(loc=0, column='county', value= f'"{parse_county_number(filename)}"')
        df.insert(loc=1, column='fips', value= fips)
    except Exception as ex:
        print(ex)


    if file_typekey in precinctType:
        df.insert(loc=2, column= f"{file_typekey}_key", value= sprec_key(df, fips, key = file_typekey))

    elif file_typekey == 'sr_blk' or file_typekey == 'rg_blk' or file_typekey == 'sr_2011blk' or file_typekey == 'rg_2011blk' :
        prec_type = file_typekey.split('_')[0] # sr | rg
        prec_code = f"{prec_type}prec"
        election_type = parse_election_year(filename).lower()

        df[prec_code] = pd.to_numeric(df[prec_code] , errors='ignore', downcast='integer')
        df.insert(loc=2, column= 'ELECTION', value= f'"{election_type}"')
        df.insert(loc=3, column= 'TYPE', value= f'"{file_typekey}"')
        df.insert(loc=4, column= f"{prec_code}_key", value = sprec_key(df, fips, prec_code))
        df.insert(loc=5, column= 'BLOCK_KEY', value=block_key(df, fips))

    else:
        pass

    df.columns = map(str.upper, df.columns)
    return df

def merge_county_files():
    ## get_files is a dictionary: key -> files types, value -> path to the actual county files##
    county_files = get_files()
    # Iterate through each file in a county folder
    print(county_files) 
    for file_Type, filenames in county_files.items():
        print(file_Type, 'filetype')
        file_name = county_files[file_Type][0]
        #file_type_dfs = []
        
        file_type_dfs = map(add_new_columns, filenames)
        state_df = pd.concat(file_type_dfs, ignore_index=True, sort= False)
       # print(file_Type)
        save_files(state_df, file_Type, file_name)

def save_files(stateLevel,file_type,file_name):
    election_type = parse_election_year(file_name)
    keysList = file_type.split('_')
    print(len(keysList) , ' length of keylist')
    if len(keysList) == 3:
        if 'blk' in keysList:
            stateString = f'/home/admin/stateFiles/state_{election_type}_{keysList[0]}_{keysList[1]}_{keysList[2]}'
            stateLevel.to_csv(f'{stateString}.csv', index=False, quoting= csv.QUOTE_NONE)

        else:
            stateString = f'/home/admin/stateFiles/state_{election_type}_{keysList[1]}_{keysList[2]}_by_{election_type}_{keysList[0]}'
            stateLevel.to_csv(f'{stateString}.csv', index=False, quoting= csv.QUOTE_NONE)


    if len(keysList) == 2:
        stateString = f'/home/admin/stateFiles/state_{election_type}_{keysList[1]}_by_{election_type}_{keysList[0]}'
        print(stateString  + ' state string ')
        stateLevel.to_csv(f'{stateString}.csv', index=False, quoting= csv.QUOTE_NONE)
            

    if len(keysList) == 1:
        stateString = f'/home/admin/stateFiles/state_{election_type.lower()}_by_{election_type}_{keysList[0]}'
        stateLevel.to_csv(f'{stateString}.csv', index=False, quoting=csv.QOUTE_NONE)


if __name__ == '__main__':

 ########################################################################################################
 ########################################################################################################
    start_time = time.time()
    args = sys.argv[1:]
    path = args[0] if args else '.'
    ## Test code:
    # path = 'C:/Users/hamsh/Documents/SWDB/Technical_Documentation/SWDB_Code/Technical_Documentation/P18/'

    merge_county_files()

