#!/usr/bin/python # -*- coding: utf-8 -*- import sys import re import codecs import datetime from xml.etree.ElementTree import ElementTree from xml.etree.ElementTree import fromstring SWEFN_URL = "swefn.xml" SALDO_URL = "saldo.txt" def read_csv_from_file(path, num_of_fields): with codecs.open(path, encoding='utf-8') as f: for line in f: e = [x for x in line[:-1].split('\t')] yield e[0:num_of_fields] def escape(s): s = s.replace('&', '&') s = s.replace("'", ''') s = s.replace('<', '<') s = s.replace('>', '>') return s.replace('"', '"') if __name__ == '__main__': saldo_entries = set() for (saldoid, prim, sec, lem, gf, pos, paradigm) in read_csv_from_file(SALDO_URL, 7): saldo_entries.add(saldoid) tree = ElementTree() tree.parse(SWEFN_URL) reports = [] entries = tree.find("Lexicon").findall("LexicalEntry") for le in entries: sense = le.find("Sense") new_saldo_lus = [] missing_in_saldo = [] if sense != None: frame_id = sense.get("id") createdBy = "" lus = [] lu_suggestions = [] empty_lu = False empty_lu_sugg = False feats = sense.findall("feat") for feat in feats: feat_att = feat.get("att") feat_val = feat.get("val") if feat_att == "semanticType": pass#cemtypes.append(feat_val) elif feat_att == "domain": pass#domains.append(feat_val) elif feat_att == "coreElement": pass#c_elements.append(feat_val) elif feat_att == "inheritance": pass#e.inheritance = feat_val elif feat_att == "peripheralElement": pass#p_elements.append(feat_val) elif feat_att == "compound": pass#comps.append(feat_val) elif feat_att == "compoundExample": pass#comp_examples.append(feat_val) elif feat_att == "LU": if feat_val.strip() != "": lus.append(feat_val) else: empty_lu = True elif feat_att == "suggestionForLU": if feat_val.strip() != "": lu_suggestions.append(feat_val) else: empty_lu_sugg = True elif feat_att == "internal_comment": # New, but should maybe not be rendered out pass elif feat_att == "comment": # Right now it's not a feat but I think it should be! pass#e.comment = feat_val elif feat_att == "createdBy": # Right now it's not a feat but I think it should be! createdBy = feat_val pass#e.created_by = feat_val elif feat_att == "createdDate": pass#e.createdDate = feat_val elif feat_att == "modifDate": pass#e.modifDate = feat_val elif feat_att == "entry_status": # New field pass#e.status = feat_val elif feat_att == "BFNID": pass#e.bid = feat_val for lu in lus: if not lu in saldo_entries: missing_in_saldo.append(lu) for sugg in lu_suggestions: if sugg in saldo_entries: new_saldo_lus.append(u'' + sugg + u''); if new_saldo_lus or missing_in_saldo or empty_lu or empty_lu_sugg: report += '

%s

(%s)
    ' % (frame_id.split("--")[1], createdBy) if new_saldo_lus: report += u"
  1. Ny identifierare finns nu i SALDO: " + ", ".join(new_saldo_lus) + "
  2. " if missing_in_saldo: report += u"
  3. Lexem saknas: " + escape(", ".join(missing_in_saldo)) + "
  4. " if empty_lu: report += u"
  5. Tom LU
  6. " if empty_lu_sugg: report += u"
  7. Tomt LU-förslag
  8. " report += '
' reports.append(report) report = '' now = datetime.datetime.now() timestring = now.isoformat(' ').split(".")[0] print ('

Error report for SweFN

%s%s' % (timestring, "
".join(reports))).encode('utf-8')