#!/usr/bin/env python
# -*- coding: utf8 -*-
import sys
import codecs
import re
from xml.etree.ElementTree import ElementTree
from xml.etree.ElementTree import fromstring
### NAMESPACES: ################   NOT USED RIGHT NOW BUT THE MECHANISM IS (PARTLY) AVAILABLE
ns_saldo_pos = 'saldo_pos'
ns_saldo_sense = 'saldo_sense'
ns_saldo_lemgram = 'saldo_lemgram'
ns_saldo_paradigm = 'saldo_pdgm'
ns_simple_semtype = 'simple_semtype'
ns_kelly_id = 'kelly_id'
ns_lwt_id = 'lwt_id'
ns_simple_class = 'simple_class'
ns_simple_domain = 'simple_domain'
################################
class LMF:
    def __init__(self, lang):
        self.lang = lang
        self.lexical_entries = []
        self._lexical_entries_set = set()
        self._le_senses = set()
        self.useNamespace = False
        self.semantic_predicates = []
    def add_lexical_entry(self, lexical_entry):
        self.lexical_entries.append(lexical_entry)
        self._lexical_entries_set.add(".".join([lexical_entry._pos, lexical_entry._wf]))   # Ett fulhack for att speeda upp det lite (ersatt med nagot battre i framtiden)
        
    def add_semantic_predicate(self, semantic_predicate):
        self.semantic_predicates.append(semantic_predicate)
    def __str__(self):
        return "\n".join([
                '',
                '',
                '',
                '',
                ' ',
                '',
                '' if not self.useNamespace else '',
                ' ' % self.lang,
                "\n".join([str(e) for e in self.lexical_entries]),
                "\n".join([str(s) for s in self.semantic_predicates]),
                '',
                ''])
class LexicalEntry:    
    def __init__(self):
        self.features = []
        self.lemma = None
        self.wordforms = []
        self.senses = []
        self.saldolinks = []
        self._pos = ""
        self._wf = ""
        self.idattr = ""
        
    def add_sense(self, sense):
        self.senses.append(sense)
    def add_feature(self, feature):
        self.features.append(feature)
    def add_feature_unique(self, feature):
        for existing_feature in self.features:
            if(existing_feature.att == feature.att and existing_feature.val == feature.val):
                return
        self.add_feature(feature)
    
    def add_wordform(self, wordform):
        self.wordforms.append(wordform)
    def add_saldoLink(self, saldoLink):
        self.saldolinks.append(saldoLink)
        
    def __str__(self):
        le_string = ''
        if(self.idattr):
            le_string = '' % (self.idattr)
        return "\n".join([
            le_string,
            '\n'.join([str(f) for f in self.features]),
            str(self.lemma),
            '\n'.join([str(w) for w in self.wordforms]),
            '\n'.join([str(s) for s in self.senses]),
            '\n'.join([str(f) for f in self.saldolinks]),
            ''])
class SaldoLink:
    def __init__(self, saldo_id):
        self.saldo_id = saldo_id
        
    def __str__(self):
        return '' % (self.saldo_id)
"""                    
class Lemma:
    def __init__(self):
        self.features = [] # now including writtenForm and partOfSpeech!
    
    def add_feature(self, feature):
        self.features.append(feature)
    
    def add_feature_unique(self, feature):
        for existing_feature in self.features:
            if(existing_feature.att == feature.att and existing_feature.val == feature.val):
                return
        self.add_feature(feature)
    
    def __str__(self):
        if self.features:
            return "\n".join(['\n',
                              '\n'.join([str(f) for f in self.features]),
                              '\n'])
        else:
            return ''
"""
class Lemma:
    def __init__(self):
        self.form_representations = []
        self.features = [] # now including writtenForm and partOfSpeech!
    
    def add_feature(self, feature):
        self.features.append(feature)
    
    def add_feature_unique(self, feature):
        for existing_feature in self.features:
            if(existing_feature.att == feature.att and existing_feature.val == feature.val):
                return
        self.add_feature(feature)
    
    def add_form_representation(self, form_representation):
        self.form_representations.append(form_representation)
    
    def __str__(self):
        if self.features or self.form_representations:
            return "\n".join(['', '\n'.join(str(fr) for fr in self.form_representations),''])
            #return "\n".join(['\n',
            #                  '\n'.join([str(f) for f in self.features]),
            #                  '\n'])
        else:
            return ''
class WordForm:
    def __init__(self):
        self.features = []
        self.form_representations = []
    
    def add_feature(self, feature):
        self.features.append(feature)
    
    def add_form_representation(self, form_representation):
        self.form_representations.append(form_representation)
    
    def __str__(self):
        return "\n".join(['',
                          '\n'.join(str(fr) for fr in self.form_representations),
                          '\n'.join([str(f) for f in self.features]),
                          ''])
            
class FormRepresentation:
    def __init__(self):
        self.features = []
        
    def add_feature(self, feature):
        self.features.append(feature)
    
    def add_feature_unique(self, feature):
        for existing_feature in self.features:
            if(existing_feature.att == feature.att and existing_feature.val == feature.val):
                return
        self.add_feature(feature)
    
    def __str__(self):
        if self.features:
            return "\n".join(['','\n'.join([str(f) for f in self.features]),''])
        else:
            return ''
class Feature:
    def __init__(self, att, val):
        self.att = att
        self.val = val
        
    def __str__(self):
        return '' % (self.att, escape(self.val))
class Sense:
    def __init__(self, sense):
        self.sense =  sense
        self.relations = []
        self.predicative_representations = []
        self.sense_examples = []
        self.features = []
        
    def add_sense_relation(self, sense_relation):
        self.relations.append(sense_relation)
    
    def add_predicative_representation(self, predicative_representation):
        self.predicative_representations.append(predicative_representation)
    
    def add_sense_example(self, sense_example):
        self.sense_examples.append(sense_example)
    def add_feature(self, feature):
        self.features.append(feature)
    def __str__(self):
        if not self.relations and not self.predicative_representations and not self.sense_examples and not self.features:
            return '' % (self.sense)
        else:
            return "\n".join(['' % (self.sense),
                            '\n'.join([str(f) for f in self.features]),
                            "\n".join([str(pre) for pre in self.predicative_representations]),
                            "\n".join([str(rel) for rel in self.relations]),
                            "\n".join([str(ex) for ex in self.sense_examples]),
                            ''
                            ])
class SenseRelation:
    def __init__(self, target, relation_types):
        self.target = target
        self.relation_types = relation_types
        self.features = []
        
    def add_feature(self, feature):
        self.features.append(feature)
        
    def __str__(self):
        return "\n".join(['' % (self.target),
                            '\n'.join(['' % t for t in self.relation_types]),
                            '\n'.join([str(f) for f in self.features]),
                            ''
                        ]) 
class SenseExample:
    def __init__(self, example):
        self.example = example
        
    def __str__(self):
        return "\n".join([
                        '',
                        '' % (self.example),
                        ''
                        ])
        
            
class SemanticPredicate:
    def __init__(self, id, domain, semantic_types):
        self.id = id
        #self.domain = domain
        self.semantic_types = semantic_types
        self.semantic_arguments = []
        self.features = []
        if domain != None and domain != "":
            self.add_feature(Feature("domain", domain))
    
    def add_semantic_argument(self, argument):
        self.semantic_arguments.append(argument)
    
    def add_feature(self, feature):
        self.features.append(feature)
    
    def generateFeatures(self, att, vals):
        for val in vals:
            self.add_feature(Feature(att, val.strip()))
        
    def __str__(self):
        extras = ""
        for st in self.semantic_types:
            extras += '' 
        return "\n".join([
                        '' % (self.id),
                        "\n".join(['\n' % (st) for st in self.semantic_types]),
                        "\n".join([str(fe) for fe in self.features]),
                        "\n".join([str(sa) for sa in self.semantic_arguments]),
                        ''
                        ])
class SemanticArgument:
    def __init__(self, semantic_role, core_type):
        self.semantic_role = semantic_role
        self.core_type = core_type
    
    def __str__(self):
        return '' % (self.semantic_role, self.core_type)
class PredicativeRepresentation:
    def __init__(self, idref):
        self.idref = idref
        
    def __str__(self):
        return '' % (self.idref, self.idref)
# HELPER FUNCTIONS -------------------------------------------------------------------------------------
def escape(s):
    s = s.replace('&', '&')
    s = s.replace("'", ''')
    s = s.replace('<', '<')
    s = s.replace('>', '>')
    return s.replace('"', '"')
def read_csv(num_of_fields, tolerates=-1):
    if tolerates == -1:
        tolerates = num_of_fields
    for line in sys.stdin:
        e = line[:-1].split('\t')
        if len(e) == num_of_fields:
            yield e
        elif len(e) >= tolerates and len(e) < num_of_fields:
            f = [""] * (num_of_fields - len(e))
            yield e + f
    return
    
def read_csv_from_file(path, num_of_fields):
    with codecs.open(path, encoding='utf-8') as f:
        for line in f:
            e = [x.encode("utf-8") for x in line[:-1].split('\t')]
            #e = line[:-1].split('\t')
            yield e[0:num_of_fields]
    
def give_namespace(namespace, identifier):
    return identifier
    #return '%s:%s' % (namespace, identifier)
def search_for_le_with_sense(lmf, sense):
    for (s, le) in lmf._le_senses:
        if s == sense:
            return le
    return None
def dequote(s):
    if (s[0] == "'" and s[-1] == "'") or (s[0] == '"' and s[-1] == '"') :
        return s[1:-1]
    else:
        return s
# --- SALDO ------------------------------------------------
##def sorting_by_sense(a, b):
##    if a[0] == b[0]:
##        return 0
##    elif a[0] < b[0]:
##        return -1
##    else:
##        return 1
# a sense is unique for an entry.
def saldo_data():
    forms  = {}
    senses = set()
    for (saldo, primary, secondary, lemgram, gf, pos, paradigm) in read_csv(num_of_fields=7):
        if saldo in forms:
            forms[saldo].append((lemgram, gf, pos, paradigm))
        else:
            forms[saldo] = [(lemgram, gf, pos, paradigm)]
        senses.add((saldo, primary, secondary))
    return (forms, sorted(list(senses), key=lambda x: x[0]))
def saldo():
    lmf = LMF('swe')
    (forms, senses) = saldo_data()
    lexical_entry = LexicalEntry()
    sense = Sense(give_namespace(ns_saldo_sense,'PRIM..1'))
    lexical_entry.lemma = Lemma()
    lexical_entry.add_sense(sense)
    lmf.add_lexical_entry(lexical_entry)
    counter = 0
    for (saldo, primary, secondary) in senses:
        #saldo = give_namespace(ns_saldo_sense, saldo.strip())
        counter += 1
        lexical_entry = LexicalEntry()
        lemma = Lemma()
        for (lemgram, gf, pos, paradigm) in forms[saldo]:
            form_representation = FormRepresentation()
            form_representation.add_feature(Feature("writtenForm", gf))
            form_representation.add_feature(Feature("partOfSpeech", give_namespace(ns_saldo_pos, pos)))
            form_representation.add_feature(Feature("lemgram", give_namespace(ns_saldo_lemgram, lemgram)))
            form_representation.add_feature(Feature("paradigm", give_namespace(ns_saldo_paradigm, paradigm)))
            lemma.add_form_representation(form_representation)
        lexical_entry.lemma = lemma
        sense = Sense(give_namespace(ns_saldo_sense, saldo))
        #if primary != 'PRIM..1':
        primary_relation = SenseRelation(give_namespace(ns_saldo_sense, primary), ['primary'])
        sense.add_sense_relation(primary_relation)
        if secondary != 'PRIM..1':
            for sec in secondary.split(' '):
                secondary_relation = SenseRelation(give_namespace(ns_saldo_sense, sec), ['secondary'])
                sense.add_sense_relation(secondary_relation)
        
        lexical_entry.add_sense(sense)
        lmf.add_lexical_entry(lexical_entry)
    #return counter
    #return len(lmf.lexical_entries)
    return str(lmf)
#def search_for_lexical_entry(lmf, pos, saldo):
#    #saldo = saldo.split("..")[0]
#    #if (pos + "." + saldo) in lmf._lexical_entries_set: # Fulhack for att gora det snabbare, ersatt med nat battre i framtiden.
#    if saldo in 
#        for le in lmf.lexical_entries:
#            if pos == le._pos and saldo == le._wf:
#                return le
#    return None
#def search_for_sense(lexical_entry, saldo):
#    for s in lexical_entry.senses:
#        if s.sense == saldo:
#            return s
#    return None
    
# ------- SALDO EXAMPLES -----------------------------------
def saldo_examples_data():
    examples = []
    for (saldo, _, _, _, _, example) in read_csv(num_of_fields=6):
        saldo = give_namespace(ns_saldo_sense, saldo.strip())
        example = example.strip()
        if example and example != "*":
            examples.append((saldo, example))
    return examples
def saldo_examples():
    lmf = LMF('swe')
    added_examples = []
    for (saldo, example) in saldo_examples_data():
        le = search_for_le_with_sense(lmf, saldo)
        if not le:
            le = LexicalEntry()
            le.lemma = Lemma()
            lmf._le_senses.add((saldo, le))
            sense = Sense(saldo)
            le.add_sense(sense)
            lmf.add_lexical_entry(le)
        #sense = search_for_sense(le, saldo)
        sense = le.senses[0]
        if not (example, saldo) in added_examples: # We don't want duplicates if we have different sources
            added_examples.append((example, saldo))
            sense.add_sense_example(SenseExample(escape(example)))
    return str(lmf)
# ------- SWESAURUS ----------------------------------------
def swesaurus_data():
    return read_csv(num_of_fields=5)
def swesaurus():
    lmf = LMF('swe')
    synsets = swesaurus_data()
    for (saldo1, saldo2, type_of, degree, source) in synsets:
        saldo1 = give_namespace(ns_saldo_sense, saldo1.strip())
        saldo2 = give_namespace(ns_saldo_sense, saldo2.strip())
        
        saldo = [saldo1, saldo2]
        le = [None, None]
        
        # The relations are not always symmetric, but since a SenseRelation's IDREF has to point at a Sense ID,
        # we have to make sure that there is an inverse relation (y-to-x for each x-to-y)
        n = 2 if type_of == "syn" else 1
        for i in range(n):
            le[i] = search_for_le_with_sense(lmf, saldo[i])
            if le[i] == None:
                le[i] = LexicalEntry()
                le[i].lemma = Lemma()
                lmf._le_senses.add((saldo[i], le[i]))
                lmf.add_lexical_entry(le[i])
                sense = Sense(saldo[i])
                le[i].add_sense(sense)
            else:
                sense = le[i].senses[0]
            sense_relation = SenseRelation(saldo[(i+1) % 2], [type_of])
            sense_relation.add_feature(Feature("degree", degree))
            sense_relation.add_feature(Feature("source", source))
            sense.add_sense_relation(sense_relation)
        
    return str(lmf)
# ---------- LWT -------------------------------------------
def lwt_data():
    return read_csv(num_of_fields=5)
def lwt(): # LexicalEntry acts more like a semantic entry right now
    lmf = LMF('swe')
    lmf.useNamespace = True
    entries = lwt_data()
    for (s_id, saldo, eng, definition, example) in entries:
        eng2 = eng.replace("'", "").replace("?", "")
        if "(" in eng2:
            eng2 = eng2.split("(")[0].strip()
        if "/" in eng2:
            eng2 = eng2.split("/")[0].strip()
        le = LexicalEntry()
        sense = Sense("lwt--" + s_id)
        saldo = saldo.strip()
        for s in saldo.split():
            s = s.strip()
            if s != 'PRIM..1':
                le.add_saldoLink(SaldoLink(s))
                sense.add_feature(Feature("saldoSense", s))
        le.lemma = Lemma()
        le.add_sense(sense)
        lmf.add_lexical_entry(le)
        form_representation = FormRepresentation()
        form_representation.add_feature(Feature("lwtID", s_id))
        form_representation.add_feature(Feature("english", eng))
        if definition and definition != "--":
            form_representation.add_feature(Feature("definition", dequote(definition)))
        if example and example != "--":
            form_representation.add_feature(Feature("example", dequote(example))) # Maybe the apostrophes around (most of) the examples should be stripped of?
        le.lemma.add_form_representation(form_representation)
            
    return str(lmf)
# ---------- PAROLE ----------------------------------------
def parole_data():
    return read_csv(num_of_fields=5)
def parole():
    lmf = LMF('swe')
    lmf.useNamespace = True
    entries = parole_data()
    parole_entries = {}
    for (baseform, saldo, pos, valency, paroleid) in entries:
        if paroleid == "zz":
            paroleid = baseform + "_zz"
        saldo = saldo.strip()
        # sometimes the tabs have become spaces, so this is a fix for that:
        if saldo == "av arbeta_av..1":
            baseform = "arbeta av"
            saldo = "arbeta_av..1"
        elif saldo == "bort arbeta_bort..1":
            baseform = "arbeta bort"
            saldo = "arbeta_bort..1"
        elif saldo == " epilera..1":
            saldo = "epilera..1"
        elif saldo == "frottera _sig..1":
            saldo = "frottera_sig..1"
        if baseform == "gille(s)stuga":
            baseform = "gillestuga"
        if paroleid in parole_entries:
            if saldo != "PRIM..1":
                parole_entries[paroleid]["saldo"].append(saldo)
        else:
            parole_entries[paroleid] = {}
            parole_entries[paroleid]["pos"] = pos
            parole_entries[paroleid]["baseform"] = baseform
            parole_entries[paroleid]["valency"] = valency
            parole_entries[paroleid]["paroleid"] = paroleid
            parole_entries[paroleid]["saldo"] = [saldo] if saldo != "PRIM..1" else []
    for pe in parole_entries:
        le = LexicalEntry()
        lemma = Lemma()
        le.lemma = lemma
        fr = FormRepresentation()
        lemma.add_form_representation(fr)
        s = Sense("parole--" + parole_entries[pe]["paroleid"])
        le.add_sense(s)
        fr.add_feature(Feature("partOfSpeech", parole_entries[pe]["pos"]))
        fr.add_feature(Feature("writtenForm", parole_entries[pe]["baseform"]))
        fr.add_feature(Feature("valency", parole_entries[pe]["valency"]))
        fr.add_feature(Feature("paroleID", parole_entries[pe]["paroleid"]))
        for sid in parole_entries[pe]["saldo"]:
            le.add_saldoLink(SaldoLink(sid))
            s.add_feature(Feature("saldoSense", sid))
        lmf.add_lexical_entry(le)
        #saldo = give_namespace(ns_saldo_sense, saldo.strip())
        #if saldo == "PRIM..1":
        #    index = 0
        #    while True:
        #        index += 1
        #        dummy = search_for_le_with_sense(lmf, "parolelexplus--" + baseform.strip() + ".." + str(index))
        #        if not dummy:
        #            break
        #    saldo = "parolelexplus--" + baseform.strip() + ".." + str(index)
        #le = search_for_le_with_sense(lmf, saldo)
        #if not le:
        #    le = LexicalEntry()
        #    le.add_sense(Sense(saldo))
        #    lmf._le_senses.add((saldo, le))
        #    lmf.add_lexical_entry(le)
        #    le.lemma = Lemma()
        #form_representation = FormRepresentation()
        #form_representation.add_feature(Feature("partOfSpeech", give_namespace(ns_saldo_pos, pos)))
        #form_representation.add_feature(Feature("writtenForm", baseform))
        #form_representation.add_feature(Feature("valency", v))
        #form_representation.add_feature(Feature("paroleID", x))
        #le.lemma.add_form_representation(form_representation)
        #le.add_feature(Feature("x", x)) # Don't know what x really is. It is not rendered to the old XML version
    return str(lmf)
# ---------- SIMPLE ----------------------------------------
simple_semantic_types = {'++ext' : 'Extensional',
                         '++psy' : 'Psychological_property',
                         '++phy' : 'Physical_property',
                         '++soc' : 'Social_property',
                         '++tem' : 'Temporal_property',
                         '++inp' : 'Intensifying_property',
                         '++rel' : 'Relational_property'}
simple_argmap = {'a_00' : '0',
                 'a0' : '1',
                 'a0 a1' : '2',
                 'a0 a1a' : '2', # markup error?
                 'a0 a' : '2', # markup error?
                 'a0 a1 a2' : '3',
                 'a0 a1a2' : '3'} # markup error?
def simple_expand_semantic_type(abbrev):
    abbrev = simple_semantic_types.get(abbrev, abbrev)
    if abbrev[0] == "+":
        abbrev = abbrev[1:]
    return abbrev
def simple_data():
    return read_csv(num_of_fields=17)
def simple():
    lmf = LMF('swe')
    lmf.useNamespace = True
    entries = simple_data()
    for (baseform,paroleid,_,ssensen,gldb,bc,ontology,domain,lexiquest,gldbex,usynsemu,args,argreal,predfornoun,verbnoun,pos,saldo) in entries:
        le = LexicalEntry()
        newID = "simple--" + paroleid + "-" + ssensen[2:]
        sense = Sense(newID)
        le.add_sense(sense)
        if domain == "g":
            domain = "Gen"
        ## SemanticType (= ontology)
        sense.add_feature(Feature("semanticType", simple_expand_semantic_type(ontology)))
        sense.add_feature(Feature("domain", domain))
        if ssensen[0:2] == "<<":
            sense.add_feature(Feature("simpleSenseNumber", ssensen[2:]))
        sense.add_feature(Feature("GLDB", gldb))
        if gldbex != "-":
            sense.add_feature(Feature("GLDBExample", gldbex)) # lemma/sense/nuance
        ## Basic Concepts
        if bc != "ZZ":
            sense.add_feature(Feature("basicConcept", bc)) # lemma/sense/nuance
        ## LexiQuest
        lexiquest = lexiquest.strip()
        for c in lexiquest.split("@"):
            sense.add_feature(Feature("class", c))
        ## Codes(?), number of links between an usyn construction and corresponding semu specifications
        usynsemu = usynsemu.strip()
        if usynsemu != "-":
            if usynsemu[0] == "p":
                usynsemu = usynsemu[1:]
            sense.add_feature(Feature("numberOfUsynSemuLinks", usynsemu))
        
        ## Arguments
        if args != "-":
            args = simple_argmap[args]
            sense.add_feature(Feature("numberOfArguments", args))
        
        ## Argument relisations:
        argreal = argreal.strip()
        if argreal != "aa_00":
            #for ar in argreal.split("_OR_"):   <-- This can be realised in the future
            # ...
            sense.add_feature(Feature("argumentRealisation", argreal))
        
        ## Predicate for noun
        if predfornoun != "-":
            sense.add_feature(Feature("predicate", predfornoun))
        
        ## Type of verb_noun
        #     typ av verbalt substantiv l_n (verb nominalisation); l_ag (agent nominalisation); l_pa (process nominalization); annars l_00
        if verbnoun != "-" and verbnoun != "l_00":
            verbnoun = verbnoun[2:]
            sense.add_feature(Feature("verbalizedNounType", verbnoun))
        lemma = Lemma()
        le.lemma = lemma
        fr = FormRepresentation()
        lemma.add_form_representation(fr)
        fr.add_feature(Feature("partOfSpeech", pos))
        fr.add_feature(Feature("writtenForm", baseform))
        fr.add_feature(Feature("paroleID", paroleid))
        if saldo != "-":
            for s in saldo.split(";"):
                le.add_saldoLink(SaldoLink(s))
                sense.add_feature(Feature("saldoSense", s))
        lmf.add_lexical_entry(le)
        #saldo = give_namespace(ns_saldo_sense, saldo.strip())
        #if saldo == 'PRIM..1' or saldo == '-':
        #    index = 0
        #    while True:
        #        index += 1
        #        dummy = search_for_le_with_sense(lmf, "simpleplus--" + baseform.strip().replace(" ", "_") + ".." + str(index))
        #        if not dummy:
        #            break
        #    saldo = "simpleplus--" + baseform.strip().replace(" ", "_") + ".." + str(index)
        #le = search_for_le_with_sense(lmf, saldo)
        #if not le:
        #    le = LexicalEntry()
        #    le.add_sense(Sense(saldo))
        #    lmf._le_senses.add((saldo, le))
        #    lmf.add_lexical_entry(le)
        #    le.lemma = Lemma()
        #form_representation = FormRepresentation()
        #
        ## Written Form
        #form_representation.add_feature(Feature("writtenForm", baseform))
        #
        ## Part-of-speech
        #form_representation.add_feature(Feature("partOfSpeech", give_namespace(ns_saldo_pos, pos)))
        #
        ## SemanticType (= ontology)
        #form_representation.add_feature(Feature("semanticType", give_namespace(ns_simple_semtype, simple_expand_semantic_type(ontology))))
        #
        ## Domain
        #if domain == "g":
        #    domain = "Gen"
        #form_representation.add_feature(Feature("domain", give_namespace(ns_simple_domain, domain)))
        #
        ## Simple Sense Number
        #if ssensen[0:2] == "<<":
        #    form_representation.add_feature(Feature("simpleSenseNumber", ssensen[2:]))
        #
        ## GLDB lemma/sense/nuance numbers
        #form_representation.add_feature(Feature("GLDB", gldb)) # lemma/sense/nuance
        #
        ## GLDB example (only for verbs)
        #if gldbex != "-":
        #    form_representation.add_feature(Feature("GLDBExample", gldbex)) # lemma/sense/nuance
        #
        ## Basic Concepts
        #if bc != "ZZ":
        #    form_representation.add_feature(Feature("basicConcept", bc)) # lemma/sense/nuance
        #
        ## LexiQuest
        #lexiquest = lexiquest.strip()
        #for c in lexiquest.split("@"):
        #    form_representation.add_feature(Feature("class", give_namespace(ns_simple_class, c)))
        #
        #
        ## Codes(?), number of links between an usyn construction and corresponding semu specifications
        #usynsemu = usynsemu.strip()
        #if usynsemu != "-":
        #    if usynsemu[0] == "p":
        #        usynsemu = usynsemu[1:]
        #    form_representation.add_feature(Feature("numberOfUsynSemuLinks", usynsemu))
        #
        ## Arguments
        #if args != "-":
        #    args = simple_argmap[args]
        #    form_representation.add_feature(Feature("numberOfArguments", args))
        #
        ## Argument relisations:
        #argreal = argreal.strip()
        #if argreal != "aa_00":
        #    #for ar in argreal.split("_OR_"):   <-- This can be realised in the future
        #    # ...
        #    form_representation.add_feature(Feature("argumentRealisation", argreal))
        #
        ## Predicate for noun
        #if predfornoun != "-":
        #    form_representation.add_feature(Feature("predicate", predfornoun))
        #
        ## Type of verb_noun
        ##     typ av verbalt substantiv l_n (verb nominalisation); l_ag (agent nominalisation); l_pa (process nominalization); annars l_00
        #if verbnoun != "-" and verbnoun != "l_00":
        #    verbnoun = verbnoun[2:]
        #    form_representation.add_feature(Feature("verbalizedNounType", verbnoun))
        #
        #le.lemma.add_form_representation(form_representation)
        
    return str(lmf)
# ---------- KELLY -----------------------------------------
kelly_to_saldo = {'verb' : 'vb',
                  'noun' : 'nn',
                  'noun-en' : 'nn',
                  'noun-ett' : 'nn',
                  'noun-en/-ett' : 'nn',
                  'adjective' : 'av',
                  'numeral' : 'nl',
                  'proper name' : 'pm',
                  'adverb' : 'ab',
                  'aux verb' : 'vb',
                  'conj' : 'kn',
                  'det' : 'pn',
                  'interj' : 'in',
                  'particip' : 'vb',
                  'particle' : 'ab',
                  'prep' : 'pp',
                  'pronoun' : 'pn',
                  'subj' : 'sn'}
def map_kelly_pos_to_saldo(pos):
    return kelly_to_saldo.get(pos, pos)
def kelly_data():
    return read_csv(num_of_fields=10, tolerates=9)
def kelly():
    lmf = LMF('swe')
    lmf.useNamespace = True
    entries = kelly_data()
    for (id_num, raw, wpm, cefr, source, grammar, baseform, saldo, pos, example) in entries:
        saldo = saldo.strip()
        if "(" in baseform:
            extrainfo = "(" + baseform.split("(")[1]
            baseform = baseform.split("(")[0].strip()
        else:
            extrainfo = None
        le = LexicalEntry()
        sense = Sense("kelly--" + baseform)
        le.add_sense(sense)
        for s in saldo.split():
            le.add_saldoLink(SaldoLink(s.strip()))
            sense.add_feature(Feature("saldoSense", s))
        kellyid = "" # TODO: FIX
        
        lemma = Lemma()
        le.lemma = lemma
        form_representation = FormRepresentation()
        lemma.add_form_representation(form_representation)
        form_representation.add_feature(Feature("writtenForm", baseform))
        if extrainfo:
            form_representation.add_feature(Feature("formInformation", extrainfo))
        form_representation.add_feature(Feature("partOfSpeech", map_kelly_pos_to_saldo(pos)))
        form_representation.add_feature(Feature("kellyPartOfSpeech", pos))
        form_representation.add_feature(Feature("kellyIdentifier", id_num))
        form_representation.add_feature(Feature("raw", raw))
        form_representation.add_feature(Feature("wpm", wpm))
        form_representation.add_feature(Feature("cefr", cefr))
        form_representation.add_feature(Feature("source", source))
        if grammar: # Maybe both grammar and example should always be there but empty?
            form_representation.add_feature(Feature("grammar", grammar))
        if example:
            form_representation.add_feature(Feature("example", example)) # Maybe "e.g. " in the start of the sentences should be deleted?
        lmf.add_lexical_entry(le)
        #for s in saldo.split():
        #    s = s.strip()
        #    if s == 'PRIM..1':
        #        index = 0
        #        while True:
        #            index += 1
        #            dummy = search_for_le_with_sense(lmf, "kelly--" + baseform.strip().replace(" ", "_") + ".." + str(index))
        #            if not dummy:
        #                break
        #        s = "kelly--" + baseform.strip().replace(" ", "_") + ".." + str(index)
        #    s = give_namespace(ns_saldo_sense, s)
        #    le = search_for_le_with_sense(lmf, s)
        #    if not le:
        #        le = LexicalEntry()
        #        le.add_sense(Sense(s))
        #        lmf._le_senses.add((s, le))
        #        lmf.add_lexical_entry(le)
        #        
        #    lemma = le.lemma
        #    if not lemma:
        #        lemma = Lemma()
        #        le.lemma = lemma
        #    form_representation = FormRepresentation()
        #    lemma.add_form_representation(form_representation)
        #    
        #    form_representation.add_feature_unique(Feature("writtenForm", baseform))
        #    if extrainfo:
        #        form_representation.add_feature_unique(Feature("formInformation", extrainfo))
        #    form_representation.add_feature_unique(Feature("partOfSpeech", give_namespace(ns_saldo_pos, map_kelly_pos_to_saldo(pos))))
        #    
        #    form_representation.add_feature_unique(Feature("kellyIdentifier", give_namespace(ns_kelly_id, id_num)))
        #    form_representation.add_feature_unique(Feature("raw", raw))
        #    form_representation.add_feature_unique(Feature("wpm", wpm))
        #    form_representation.add_feature_unique(Feature("cefr", cefr))
        #    form_representation.add_feature_unique(Feature("source", source))
        #    if(grammar): # Maybe both grammar and example should always be there but empty?
        #        form_representation.add_feature(Feature("grammar", grammar))
        #    if(example):
        #        form_representation.add_feature(Feature("example", example)) # Maybe "e.g. " in the start of the sentences should be deleted?
            
    return str(lmf)
# ---------- WORDNET ---------------------------------------
# Wordnet is a little special because it operates on the files 'wn3_synsets.txt' and 'wordnet-saldo.txt' already in the directory
saldo_pos_from_wordnet = { "n" : "nn", "s" : "av", "v" : "vb", "r" : "ab", "a" : "av"} # adjective satellites (s) can also e numerals etc.
def wordnet_data():
    synsets = {}
    with codecs.open('wn3_synsets.txt', encoding='utf-8') as f:
        for line in f:
            e = [x.encode("utf-8") for x in line[:-1].split('\t')]
            synsets[e[0]] = {}
            syn = synsets[e[0]]
            syn["gloss"] = e[2]
            syn["pos"] = e[3]
            syn["definition"] = e[4] # The definition may contain one or more examples, grab these later!   FORM: definition; "[example]"; "[example]" ...
    
    entries = {}
    with codecs.open('wordnet-saldo.txt', encoding='utf-8') as f:
        for line in f:
            e = [x.encode("utf-8") for x in line[:-1].split('\t')]
            if not e[0] in entries:
                entries[e[0]] = {}
            ent = entries[e[0]]
            ent["saldo"] = e[1]
            ent["synset"] = e[0]
            ent["type"] = e[2]
            ent["core"] = e[6]
            ent["freq"] = e[4]
            ent["gloss"] = synsets[e[0]]["gloss"]
            ent["pos"] = synsets[e[0]]["pos"]
            ent["definition"] = synsets[e[0]]["definition"]
    
    #with codecs.open('wordnet-saldo-relations.txt', encoding='utf-8') as f:
    #    for line in f:
    #        e = [x.encode("utf-8") for x in line[:-1].split('\t')]
    #        if not "relations" in entries[e[0]]:
    #            entries[e[0]]["relations"] = []
    #        rels = entries[e[0]]["relations"]
    #        rels.append((e[1], e[2]))
    
    return entries
def wordnet():
    lmf = LMF('swe')
    lmf.useNamespace = True
    objects = wordnet_data()
    for key in objects:
        item = objects[key]
        le = LexicalEntry()
        lmf.add_lexical_entry(le)
        sense = Sense("wordnet--" + key.replace(":","_").replace("%","_"))
        le.add_sense(sense)
        le.add_saldoLink(SaldoLink(item["saldo"]))
        sense.add_feature(Feature("saldoSense", item["saldo"]))
        #if "relations" in item:
        #    for rel in item["relations"]:
        #        s.add_sense_relation(SenseRelation(rel[1], [rel[0]]))
        lemma = Lemma()
        le.lemma = lemma
        fr = FormRepresentation()
        glosses = item["gloss"].split(", ")
        for gl in glosses:
            fr.add_feature(Feature("gloss", gl))
        fr.add_feature(Feature("partOfSpeech", saldo_pos_from_wordnet[item["pos"]]))
        fr.add_feature(Feature("wordnetPartOfSpeech", item["pos"]))
        def_and_examples = [x.strip() for x in item["definition"].split(";")]
        if len(def_and_examples) >= 1:
            sense.add_feature(Feature("definition", def_and_examples[0]))
        if len(def_and_examples) > 1:
            for examp in def_and_examples[1:]:
                if examp != "":
                    sense.add_feature(Feature("example", dequote(examp)))
        sense.add_feature(Feature("synset", item["synset"]))
        sense.add_feature(Feature("type", item["type"]))
        sense.add_feature(Feature("core", item["core"]))
        sense.add_feature(Feature("frequency", item["freq"]))
        lemma.add_form_representation(fr)
    return str(lmf)
# ---------- CROSS PIVOT -----------------------------------
# Uses the raw material from dalin and fsv to make a cross pivot resource
# allowing searching on for example "brev" to find "bref" etc.
def cp_fsvbase_data():
    return read_csv_from_file("../fsv/fsv.txt", 10)
    
def cp_dalinbase_data():
    return read_csv_from_file("dalin_saldo.txt", 10)
    
def crosspivot():
    lmf = LMF('swe')
    pivots = {}
    # Data from Dalin
    entries = cp_dalinbase_data()
    for (old_spelling, new_spelling, pos, dalin_gram, dalin_lemgram, le_type, pattern, saldo_lemgram, saldo_senses, skos) in entries:
        if saldo_lemgram != "--" and saldo_lemgram != "PRIM..1":
            if saldo_lemgram in pivots:
                pivots[saldo_lemgram].append(("_1800", dalin_lemgram, skos))
            else:
                pivots[saldo_lemgram] = [("_1800", dalin_lemgram, skos)]
    # Data from FSV
    entries = cp_fsvbase_data()
    for (old_spelling, new_spelling, pos, _, fsv_lemgram, le_type, pattern, saldo_lemgram, saldo_senses, skos) in entries:
        if saldo_lemgram != "--" and saldo_lemgram != "PRIM..1":
            if " " in saldo_lemgram:
                saldo_lemgrams = saldo_lemgram.split(" ")
            else:
                saldo_lemgrams = [saldo_lemgram]
            for sl in saldo_lemgrams:
                if not sl in pivots:
                    pivots[sl] = []
                pivots[sl].append(("old", fsv_lemgram, skos))
    
    for pivot in pivots:
        le = LexicalEntry()
        lemma = Lemma()
        le.lemma = lemma
        saldo_fr = FormRepresentation()
        saldo_fr.add_feature(Feature("category", "modern"))
        saldo_fr.add_feature(Feature("lemgram", pivot))
        lemma.add_form_representation(saldo_fr)
        for post in pivots[pivot]:
            fr = FormRepresentation()
            fr.add_feature(Feature("category", post[0]))
            fr.add_feature(Feature("lemgram", post[1]))
            fr.add_feature(Feature("match", post[2]))
            lemma.add_form_representation(fr)
        lmf.add_lexical_entry(le)
    return str(lmf)
# ---------- DALIN BASE MATERIAL -----------------------------------
def dalinbase_data():
    return read_csv(num_of_fields=10)
    
def dalinbase():
    lmf = LMF('swe')
    entries = dalinbase_data()
    for (old_spelling, new_spelling, pos, dalin_gram, dalin_lemgram, le_type, pattern, saldo_lemgram, saldo_senses, match_type) in entries:
        le = LexicalEntry()
        lemma = Lemma()
        le.lemma = lemma
        fr = FormRepresentation()
        fr.add_feature(Feature("lemgram", dalin_lemgram))
        fr.add_feature(Feature("oldSpelling", old_spelling))
        fr.add_feature(Feature("newSpelling", new_spelling))
        fr.add_feature(Feature("xref", le_type))
        fr.add_feature(Feature("partOfSpeech", pos))
        if pattern != "--":
            fr.add_feature(Feature("paradigm", pattern))
        lemma.add_form_representation(fr)
        lmf.add_lexical_entry(le)
    return str(lmf)
# ----------- SWEDBERG "FAKE" MORPHOLOGY -------------------------
def swedbergm():
    total = []
    xml_tree = fromstring(sys.stdin.read())
    for entry in xml_tree.find("Lexicon"):
        lemma = entry.find("Lemma")
        baseforms = []
        lem = None
        pos = "prim"
        if lemma != None:
            freps = lemma.findall("FormRepresentation")
            if freps != None:
                for fr in freps:
                    allfeats = fr.findall("feat")
                    if allfeats != None:
                        for feat in allfeats:
                            if feat.attrib["att"] == "writtenForm":
                                baseforms.append(feat.attrib["val"].encode('utf-8'))
                            elif feat.attrib["att"] == "lemgram":
                                lem = feat.attrib["val"].encode('utf-8')
                            elif feat.attrib["att"] == "partOfSpeech":
                                pos = feat.attrib["val"].encode('utf-8')
        total.append((lem, baseforms, pos))
    
    lmf = LMF('swe')
    for (lemg, wflist, pos) in total:
        le = LexicalEntry()
        lemma = Lemma()
        le.lemma = lemma
        fr = FormRepresentation()
        lemma.add_form_representation(fr)
        if lemg != None:
            fr.add_feature(Feature("lemgram", lemg))
            for wf in wflist:
                pass
                fr.add_feature(Feature("writtenForm", wf))
                wordform = WordForm()
                wordform.add_feature(Feature("writtenForm", wf))
                wordform.add_feature(Feature("msd", "prim"))
                le.add_wordform(wordform)
            if pos != "prim":
                fr.add_feature(Feature("partOfSpeech", pos))
            fr.add_feature(Feature("paradigm", "prim"))
            lmf.add_lexical_entry(le)
    return str(lmf)
# ----------- AKADEMISK ORDLISTA----------------------------
def ao_data():
    return read_csv(num_of_fields=3)
def ao():
    pos_conversion = {"adverb" : "ab",
                      "substantiv" : "nn",
                      "adjektiv" : "av",
                      "verb" : "vb",
                      "preposition" : "pp",
                      "konjunktion" : "kn",
                      "particip" : "av",
                      "frågande/relativt_possesivuttryck" : "pn",
                      "partikel" : "pp",
                      "possessivuttryck" : "pn",
                      "pronomen" : "pn",
                      "subjunktion" : "sn",
                      }
    lmf = LMF('swe')
    entries = ao_data()
    rank = 1
    for (ao_lemma, pos, saldo_id) in entries:
        le = LexicalEntry()
        lemma = Lemma()
        le.lemma = lemma
        fr = FormRepresentation()
        lemma.add_form_representation(fr)
        fr.add_feature(Feature("writtenForm", ao_lemma.strip()))
        lemgrams = saldo_id.split("|")
        for l in lemgrams:
            fr.add_feature(Feature("lemgram", l.strip()))
        fr.add_feature(Feature("partOfSpeech", pos_conversion[pos.strip()]))
        fr.add_feature(Feature("nativePartOfSpeech", pos.strip()))
        fr.add_feature(Feature("rank", str(rank)))
        sense = Sense("ao--" + ao_lemma.strip())
        le.add_sense(sense)
        rank += 1
        lmf.add_lexical_entry(le)
    return str(lmf)
# ----------------------------------------------------------
if __name__ == '__main__':
    if len(sys.argv) > 1:
        resource = sys.argv[1]
        if resource == 'saldo':
            print saldo()
        if resource == 'saldoe':
            print saldo_examples()
        elif resource == 'swesaurus':
            print swesaurus()
        elif resource == 'lwt':
            print lwt()
        elif resource == 'parole':
            print parole()
        elif resource == 'simple':
            print simple()
        elif resource == 'kelly':
            print kelly()
        elif resource == 'wordnet':
            print wordnet()
        elif resource == 'crosspivot':
            print crosspivot()
        elif resource == 'dalinbase':
            print dalinbase()
        elif resource == 'swedbergm':
            print swedbergm()
        elif resource == 'ao':
            print ao()