# coding: utf-8

import argparse
import codecs
import asflight


# monosyllabic prepositions + conjuctions a, i
proclitics_ver1 = [
    u"a", u"i", u"bez", u"či", u"dle", u"do", u"k", u"ke", u"kol", u"ku",
    u"na", u"nad", u"o", u"ob", u"od", u"po", u"pod", u"pro", u"před", u"přes", u"při",
    u"s", u"se", u"skrz", u"u", u"v", u"ve", u"vně", u"z", u"za", u"ze", u"zpod" ]

# version #1 + monosyllabic relative pronouns
proclitics_ver2 = [
    u"a", u"i", u"bez", u"či", u"dle", u"do", u"k", u"ke", u"kol", u"ku",
    u"na", u"nad", u"o", u"ob", u"od", u"po", u"pod", u"pro", u"před", u"přes", u"při",
    u"s", u"se", u"skrz", u"u", u"v", u"ve", u"vně", u"z", u"za", u"ze", u"zpod",
    u"kdo", u"co", u"čí", u"jenž", u"jež", u"již", u"jíž", u"jichž", u"jimž",
    u"němž", u"níž", u"nichž", u"jímž", u"kom", u"kým", u"čem", u"čím", u"čím", u"čích" ]

# list of enclitic pronouns
enclitics = [ u"se", u"si" ]


# unit keys: phone, pwordBoundPos, pphrsBoundPos, prosodeme, word


def set_pwords( unit_list, ver=1 ):

    if ver == 1:
        proclitics = proclitics_ver1
    elif ver == 2:
        proclitics = proclitics_ver2

    # ----- process proclitics

    unit_prev = None
    append = False

    for unit in unit_list:

        if unit['pwordBoundPos'] == 'P':
            if ( unit_prev is not None ) and ( unit_prev['pwordBoundPos'] != 'P' ):
                unit_prev['pwordBoundPos'] = 'L'
            append = False

        elif ( unit['word'] is not None ) and ( unit['word'] != '.' ):

            if append is False:
                if ( unit_prev is not None ) and ( unit_prev['pwordBoundPos'] != 'P' ):
                    unit_prev['pwordBoundPos'] = 'L'
                unit['pwordBoundPos'] = 'F'

            else:
                if ( unit_prev is not None ) and ( unit_prev['pwordBoundPos'] != 'P' ) and ( unit_prev['pwordBoundPos'] != 'F' ):
                     unit_prev['pwordBoundPos'] = "-"
                unit['pwordBoundPos'] = "-"

            append = ( unit['word'].lower() in proclitics )

        unit_prev = unit

    # ----- process enclitic

    append = False
    phr_end = False

    for unit in reversed( unit_list ):

        if ( unit['pphrsBoundPos'] == 'L' ):
            phr_end = True

        if append:
             unit['pwordBoundPos'] = '-'
             append = False

        if phr_end and ( unit['word'] is not None ) and ( unit['word'] != "." ):
            append = ( unit['word'].lower() in enclitics )
            if append:
                unit['pwordBoundPos'] = '-'
            phr_end = False

    # ----- correction of prosodemes

    prosodeme = None
    phr_end = False

    for unit in reversed( unit_list ):

        if ( unit['pphrsBoundPos'] == 'L' ):
            prosodeme = unit['prosodeme']

        if prosodeme is not None:
            unit['prosodeme'] = prosodeme

            if unit['pwordBoundPos'] == 'F':
                prosodeme = None

        elif ( unit['prosodeme'] != '0' ) and ( unit['prosodeme'] != 'X.X' ):
            unit['prosodeme'] = '0'


# ----------


def main():

    parser = argparse.ArgumentParser( description="Modify prosodic words in ASF file." )

    parser.add_argument( type=str, metavar="ASF_IN", dest="asfIn", help="input ASF file" )
    parser.add_argument( type=str, metavar="ASF_OUT", dest="asfOut", help="output ASF file" )
    parser.add_argument( "-c", "--code-page", type=str, metavar="CODEPAGE", dest="codePage",
                         help="encoding of all files, default value: %(default)s", default='utf-8' )

    args = parser.parse_args()

    asf = asflight.AsfLight( args.asfIn, args.codePage )

    for utt_name in asf:
        set_pwords( asf[ utt_name ] )

    asf.write( args.asfOut, args.codePage )


# ----------

# run the main program
if ( __name__ == "__main__" ):
    main()
