import codecs from collections import defaultdict eng_to_lem = defaultdict(set) with codecs.open('lexin_saldo.txt', encoding='utf-8') as f: for l in f: (eng,swe,pos,lms) = l[:-1].split('\t') lms = [l for l in lms.split(' ') if len(l) > 0] if len(lms) > 0: eng_to_lem[eng].update(lms) with codecs.open('./NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt', encoding='utf-8') as f: c = 0 for l in f: c += 1 if c > 46: (eng,emo,val) = l[:-1].split('\t') ls = eng_to_lem[eng] if len(ls) > 0 and val != '0': for l in ls: print ('%s\t%s\t%s' % (l, eng, emo)).encode('utf-8')