# Libraries
import os
import pandas as pd
import re
import datetime as dt
import sys
import csv
import time
import glob as glob

def filenameParser(filename):
    '''

    :param filename: Name of files within county folders
    :return: A string representation of the metadata information parsed from the filename
    '''

    # check if any of the following metadata strings are present in the file name.
    srprec      = re.findall(r'srprec', filename)
    rgprec      = re.findall(r'rgprec', filename)
    rrprec      = re.findall(r'rrprec', filename)
    svprec      = re.findall(r'svprec', filename)
    absentee    = re.findall(r'absentees', filename)
    mailballot  = re.findall(r'mailballot', filename)
    registration = re.findall(r'registration', filename)
    poll_voters = re.findall(r'poll_voters', filename)
    voters      = re.findall(r'(?<=\d_)voters', filename)
    non_voters  = re.findall(r'nonvoters', filename)
    sov         = re.findall(r'sov_data', filename)
    to_city     =   re.findall(r'to_city', filename)
    sr_blk      = re.findall(r'sr_blk_map', filename)
    rg_blk      = re.findall(r'rg_blk_map', filename)

    # A nested list with the variable string present if the filename variable is true. Ex: [['srprec'],[],[],['sov'],.....]
    keywords = [srprec, rgprec, rrprec, svprec, absentee, mailballot, registration, poll_voters, voters, non_voters, sov, to_city, sr_blk, rg_blk]

    # Flattens the nested list variable keywords. Ex: ['srprec', 'sov']
    flat_list = [item for sublist in keywords for item in sublist]

    # list of key words are joined. Ex: if rgprec and voters is present in filename. The two words are concatenated into rgprec_voters.
    # This is used later as keys in the dictionary in the main function to keep appending when going through all county codes
    if not flat_list:
        return False
    else:
        keywordString = '_'.join(flat_list)
        return keywordString


def parse_election_year(filename):
    '''

    :param electionCode: election code. EX: G18
    :return: the last digit ex. 8
    '''
    year = filename.split('/')[-1]
    return year.split('_')[1] 


def parse_county_code(filename):
    county_code = filename.split('/')[-1]
    county_code = str(county_code)
    
    return county_code.split('_')[0][1:]



def fileType_parser(filename):
    '''

    :param filename:
    :return: the filename
    '''

    # check if any of the following metadata strings are present in the file name.
    srprec      = re.findall(r'srprec', filename)
    rgprec      = re.findall(r'rgprec', filename)
    rrprec      = re.findall(r'rrprec', filename)
    svprec      = re.findall(r'svprec', filename)
    sr_blk      = re.findall(r'sr_blk', filename)
    rg_blk      = re.findall(r'rg_blk', filename)

    if srprec:
        return srprec[0]
    elif rgprec:
        return rgprec[0]
    elif rrprec:
        return rrprec[0]
    elif svprec:
        return svprec[0]
    elif sr_blk:
        return sr_blk[0]
    elif rg_blk:
        return rg_blk[0]
    else:
        return False





def get_files():
    args = sys.argv[1:]
    path = args[0]
    files_to_work_on = glob.glob(path +'/c*/*.csv')
    print(len(files_to_work_on))
    set_ = set()
    dictionary_of_counties = dict()
    dictionary_of_files_types = dict()
    for file_ in files_to_work_on:
        fileType = filenameParser(file_)
        set_.add(fileType)
        lst_of_same_file_types = []

        if dictionary_of_files_types.get(fileType) == None or dictionary_of_files_types.get(fileType) == '':
            lst_of_same_file_types.append(file_)
            dictionary_of_files_types[fileType] = lst_of_same_file_types 
        else:
            dictionary_of_files_types[fileType].append(file_)
    print(set_)
    return dictionary_of_files_types


























def blockKey(df, fips):
    '''
    :param df: dataframe
    :param fips: fips code
    :return: a list containing string representations of the block keys
    '''
    result = []
    df_list =pd.Series(df['tract'].astype(str))
    df_list = df_list.str.zfill(6)
    for item in df_list:
        result.append(fips + item)
    return result
    #df['difference'] = 6 - df['tract'].str.len()
    

def _blockKey(df, fips):
    '''
    :param df: dataframe
    :param fips: fips code
    :return: a list containing string representations of the block keys
    '''
    f = pd.Series(df['tract'])
    f.str.zfill(6)
    block_keys = pd.Series(fips) + f + df['block']
    return block_keys


# def sprecKey(df, fips, key):

    # keyList= []
    #
    # for m, n in zip([fips] * len(df), df[key]):
    #     keyList.append(str(f'"{m + str(n)}"'))
    #
    # return keyList





def sprecKey(df, fips, key):


    keyList= []
    for i in range(0, len(df)):

        key_value = df[key].iloc[i]

        if isinstance(key_value, str):

            complete_key = str(fips) + key_value
            keyList.append(str(f'"{complete_key}"'))

        else:
            complete_key = str(fips) + str(key_value)
            keyList.append(str(f'"{complete_key}"'))
   # print(keyList)
    return keyList





def newColumns(filename):
    df = pd.read_csv(os.path.join(path, filename), dtype = str)
    df.columns = map(str.lower, df.columns)
    try:
        precinctType = ['srprec', 'rrprec', 'svprec', 'rgprec']
        fileTypekey = fileType_parser(filename)
        fips = str("06" + parse_county_code(filename))
        df.insert(loc=0, column='county', value=[str(f'"{int(n)}"') for n in [parse_county_code(filename)] * len(df)])
        df.insert(loc=1, column='fips', value=[str(f'"{m}"') for m in [fips] * len(df)])
    except Exception as ex:
        print(ex)
        

    if fileTypekey in precinctType:
        df.insert(loc=2, column= f"{fileTypekey}_key", value= sprecKey(df, fips, key = fileTypekey))

    elif fileTypekey == 'sr_blk' or fileTypekey == 'rg_blk':
        prec_type = fileTypekey.split('_')[0] # sr | rg
        prec_code = f"{prec_type}prec"
        electionType = parse_election_year(filename).lower()

        df[prec_code] = pd.to_numeric(df[prec_code] , errors='ignore', downcast='integer')
        df.insert(loc=2, column= 'ELECTION', value=[electionType] * len(df))
        df.insert(loc=3, column= 'TYPE', value=[fileTypekey] * len(df))
        df.insert(loc=4, column= f"{prec_code}_key", value = sprecKey(df, fips, prec_code))
        df.insert(loc=5, column= 'BLOCK_KEY', value=blockKey(df, fips))

    else:
        pass

    df.columns = map(str.upper, df.columns)
    return df

def unique_precinct_code():

    county_files = get_files()
    
    '''
    :param countyCode: county code passed in from main.
    :return: A dictionary of keywordString with dataframes as values
    '''

    # Iterate through each file in a county folder
    
    for file_Type, filenames in county_files.items():
        file_name = county_files.get(file_Type)[0]
        print(file_name)
       # fileTypekey = fileType_parser(filename)
        filetype_dfs = map(newColumns,  filenames)
        state_df = pd.concat(filetype_dfs, ignore_index=True)
        print(file_Type)
        saveFiles(state_df, file_Type, file_name)

def saveFiles(stateLevel, file_type,file_name):
    electionType = parse_election_year(file_name)
    keysList = file_type.split('_')
    print(file_type + ' filetype in saves files')
    #for key, value in stateLevel.items():
        #keysList = key.split('_')
       # print(keysList)
    if len(keysList) == 3:
        if 'blk' in keysList:
            stateString = f'/home/admin/stateFiles/state_{electionType}_{keysList[0]}_{keysList[1]}_{keysList[2]}'
            stateLevel.to_csv(f'{stateString}.csv', index=False, quoting=csv.QUOTE_NONE)

        else:
            stateString = f'/home/admin/stateFiles/state_{electionType}_{keysList[1]}_{keysList[2]}_by_{electionType}_{keysList[0]}'
            stateLevel.to_csv(f'{stateString}.csv', index=False, quoting=csv.QUOTE_NONE)


    if len(keysList) == 2:
        stateString = f'/home/admin/stateFiles/state_{electionType}_{keysList[1]}_by_{electionType}_{keysList[0]}'
        stateLevel.to_csv(f'{stateString}.csv', index=False, quoting=csv.QUOTE_NONE)


    if len(keysList) == 1:
        stateString = f'/home/admin/stateFiles/state_{electionType.lower()}_by_{electionType}_{keysList[0]}'
        stateLevel.to_csv(f'{stateString}.csv', index=False, quoting=csv.QUOTE_NONE)




#
# def saveFiles(stateLevel, electionType):
#
#     for key, value in stateLevel.items():
#         keysList = key.split('_')
#         electionType = electionType.lower()
#
#         if len(keysList) == 3:
#             if 'blk' in keysList:
#                 stateString = f'state_{electionType}_{keysList[0]}_{keysList[1]}_{keysList[2]}'
#                 value.to_csv(f'{stateString}.csv', index=False, quoting=csv.QUOTE_NONE)
#
#             else:
#                 stateString = f'state_{electionType}_{keysList[1]}_{keysList[2]}_by_{electionType}_{keysList[0]}'
#                 value.to_csv(f'{stateString}.csv', index=False, quoting=csv.QUOTE_NONE)
#
#
#         elif len(keysList) == 2:
#             stateString = f'state_{electionType}_{keysList[1]}_by_{electionType}_{keysList[0]}'
#             stateString = f'state_{electionType}_{keysList[1]}_by_{electionType}_{keysList[0]}'
#             value.to_csv(f'{stateString}.csv', index=False, quoting=csv.QUOTE_NONE)
#
#
#         elif len(keysList) == 1:
#             stateString = f'state_{electionType}_by_{electionType}_{keysList[4]}'
#             value.to_csv(f'{stateString}.csv', index=False, quoting=csv.QUOTE_NONE)






if __name__ == '__main__':

 ########################################################################################################
    # User defined parameter.                                                                              #
    # Assign key if you want to merge specific file type or set key to None to merge all types of file     #
    #                                                                                                      #
    key_folder = None
    ########################################################################################################


    start_time = time.time()
    args = sys.argv[1:]
    path = args[0] if args else '.'
    ## Test code:
    # path = 'C:/Users/hamsh/Documents/SWDB/Technical_Documentation/SWDB_Code/Technical_Documentation/P18/'











    unique_precinct_code()
