#!/usr/bin/python # -*- coding: utf-8 -*- import sys import re import codecs import datetime from xml.etree.ElementTree import ElementTree from xml.etree.ElementTree import fromstring SWEFN_URL = "swefn.xml" SALDO_URL = "saldo.txt" def read_csv_from_file(path, num_of_fields): with codecs.open(path, encoding='utf-8') as f: for line in f: e = [x for x in line[:-1].split('\t')] yield e[0:num_of_fields] def escape(s): s = s.replace('&', '&') s = s.replace("'", ''') s = s.replace('<', '<') s = s.replace('>', '>') return s.replace('"', '"') if __name__ == '__main__': saldo_entries = set() for (saldoid, prim, sec, lem, gf, pos, paradigm) in read_csv_from_file(SALDO_URL, 7): saldo_entries.add(saldoid) tree = ElementTree() tree.parse(SWEFN_URL) reports = [] entries = tree.find("Lexicon").findall("LexicalEntry") for le in entries: sense = le.find("Sense") new_saldo_lus = [] missing_in_saldo = [] if sense != None: frame_id = sense.get("id") createdBy = "" lus = [] lu_suggestions = [] empty_lu = False empty_lu_sugg = False feats = sense.findall("feat") for feat in feats: feat_att = feat.get("att") feat_val = feat.get("val") if feat_att == "semanticType": pass#cemtypes.append(feat_val) elif feat_att == "domain": pass#domains.append(feat_val) elif feat_att == "coreElement": pass#c_elements.append(feat_val) elif feat_att == "inheritance": pass#e.inheritance = feat_val elif feat_att == "peripheralElement": pass#p_elements.append(feat_val) elif feat_att == "compound": pass#comps.append(feat_val) elif feat_att == "compoundExample": pass#comp_examples.append(feat_val) elif feat_att == "LU": if feat_val.strip() != "": lus.append(feat_val) else: empty_lu = True elif feat_att == "suggestionForLU": if feat_val.strip() != "": lu_suggestions.append(feat_val) else: empty_lu_sugg = True elif feat_att == "internal_comment": # New, but should maybe not be rendered out pass elif feat_att == "comment": # Right now it's not a feat but I think it should be! pass#e.comment = feat_val elif feat_att == "createdBy": # Right now it's not a feat but I think it should be! createdBy = feat_val pass#e.created_by = feat_val elif feat_att == "createdDate": pass#e.createdDate = feat_val elif feat_att == "modifDate": pass#e.modifDate = feat_val elif feat_att == "entry_status": # New field pass#e.status = feat_val elif feat_att == "BFNID": pass#e.bid = feat_val for lu in lus: if not lu in saldo_entries: missing_in_saldo.append(lu) for sugg in lu_suggestions: if sugg in saldo_entries: new_saldo_lus.append(u'' + sugg + u''); if new_saldo_lus or missing_in_saldo or empty_lu or empty_lu_sugg: report += '