import codecs
from collections import defaultdict

eng_to_lem = defaultdict(set)

with codecs.open('lexin_saldo.txt', encoding='utf-8') as f:
    for l in f:
        (eng,swe,pos,lms) = l[:-1].split('\t')
        lms = [l for l in lms.split(' ') if len(l) > 0]
        if len(lms) > 0:
            eng_to_lem[eng].update(lms)

with codecs.open('./NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt', encoding='utf-8') as f:
    c = 0
    for l in f:
        c += 1
        if c > 46:
            (eng,emo,val) = l[:-1].split('\t')
            ls = eng_to_lem[eng]
            if len(ls) > 0 and val != '0':
                for l in ls:
                    print ('%s\t%s\t%s' % (l, eng, emo)).encode('utf-8')