#!/bin/bash export LANG="sv_SE.UTF-8" export LC_CTYPE="sv_SE.UTF-8" export LC_NUMERIC="sv_SE.UTF-8" export LC_TIME="sv_SE.UTF-8" export LC_COLLATE="sv_SE.UTF-8" export LC_MONETARY="sv_SE.UTF-8" export LC_MESSAGES="sv_SE.UTF-8" export LC_PAPER="sv_SE.UTF-8" export LC_NAME="sv_SE.UTF-8" export LC_ADDRESS="sv_SE.UTF-8" export LC_TELEPHONE="sv_SE.UTF-8" export LC_MEASUREMENT="sv_SE.UTF-8" export LC_IDENTIFICATION="sv_SE.UTF-8" export LC_ALL="sv_SE.UTF-8" SALDO="/home/markus/fm/sblex/bin/saldo" SALDODICT="/home/markus/fm/dicts/saldo.dict" DALIN="/home/markus/fm/sblex/bin/dalin" DALINDICT="/home/markus/fm/dicts/dalin.dict" FSV="/home/markus/fm/sblex/bin/fsv" FSVDICT="/home/markus/fm/dicts/fsv.dict" svn up metadata svn up lmf # make sure that xml files have the correct mime-type find . -name "*.xml" | grep -v '.svn' | xargs -n 1 svn propset svn:mime-type text/xml # make sure that xml files have ID set find . -name "*.xml" | grep -v '.svn' | xargs -n 1 svn propset svn:keywords Id find . -name "*.xml" | grep -v '.svn' | xargs -n 1 svn propset svn:eol-style native #saldo svn export --force https://svn.spraakdata.gu.se/repos/sblex/sal/trunk/saldo.txt cat saldo.txt | ./skript/lmf.py saldo | tidy -xml -utf8 -i > lmf/saldo/saldo.xml ./skript/validate_xml.bash lmf/saldo/saldo.xml #saldo-exempel svn export --force https://svn.spraakdata.gu.se/sb-arkiv/lexikon/saldoexempel/saldoexempel.txt cat saldoexempel.txt saldoexempel_swefn.txt | ./skript/lmf.py saldoe | tidy -xml -utf8 -i > lmf/saldoe/saldoe.xml ./skript/validate_xml.bash lmf/saldoe/saldoe.xml #swefn svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/pub/swefn-db.csv cat swefn-db.csv | ./skript/lmf.py swefn | tidy -xml -utf8 -i > lmf/swefn/swefn.xml ./skript/validate_xml.bash lmf/swefn/swefn.xml #swefn frame names list as a javascript file (for use in Karp autocomplete and frames list) cat swefn-db.csv | python ./skript/swefn_frames_list_to_js.py > js/swefn_frame_names.js # and make the new LUs available as a javascript file as well cat swefn-db.csv | python ./skript/swefn_newlus_to_js.py > js/swefn_newlus_verbose.js # and Konstruktikon names... python ./skript/konstruktikon_list_to_js.py > js/konstruktikon_names.js #swesaurus svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/swesaurus/swesaurus.txt svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/swesaurus/swesaurus-derived.txt svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/wiktionary/swesaurus_wiktionary.txt svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/swesaurus/wordnet3/wordnet-saldo-relations.txt cat swesaurus.txt swesaurus-derived.txt swesaurus_wiktionary.txt wordnet-saldo-relations.txt | ./skript/lmf.py swesaurus | tidy -xml -utf8 -i > lmf/swesaurus/swesaurus.xml ./skript/validate_xml.bash lmf/swesaurus/swesaurus.xml #parole svn export --force https://svn.spraakdata.gu.se/sb-arkiv/lexikon/parole/parole.txt cat parole.txt | ./skript/lmf.py parole | tidy -xml -utf8 -i > lmf/parolelexplus/parolelexplus.xml ./skript/validate_xml.bash lmf/parolelexplus/parolelexplus.xml #simple svn export --force https://svn.spraakdata.gu.se/sb-arkiv/simple_parole/simple_09/SIMPLEadj_SE_2009.txt svn export --force https://svn.spraakdata.gu.se/sb-arkiv/simple_parole/simple_09/SIMPLEn_SE_2009.txt svn export --force https://svn.spraakdata.gu.se/sb-arkiv/simple_parole/simple_09/SIMPLEv_SE_2009.txt cat SIMPLEv_SE_2009.txt | awk 'BEGIN {FS = "\t"; OFS = "\t"} { $14 = "-"; $15 = "-"; $16 = "vb"; print }' > "simple_processed_verbs.txt" cat SIMPLEn_SE_2009.txt | awk 'BEGIN {FS = "\t"; OFS = "\t"} { $14 = $10; $15 = $13; $13 = $12; $12 = $11; $10 = "-"; $11 = "-"; $16 = "nn"; print}' > "simple_processed_nouns.txt" cat SIMPLEadj_SE_2009.txt | awk 'BEGIN {FS = "\t"; OFS = "\t"} { $13 = $10; $10 = "-"; $11 = "-"; $12 = "-"; $14 = "-"; $15 = "-"; $16 = "av"; print}' > "simple_processed_adjs.txt" cat simple_processed_verbs.txt simple_processed_nouns.txt simple_processed_adjs.txt | ./skript/pid_to_sid.py | ./skript/lmf.py simple | tidy -xml -utf8 -i > lmf/simpleplus/simpleplus.xml ./skript/validate_xml.bash lmf/simpleplus/simpleplus.xml #kelly #svn export --force https://svn.spraakdata.gu.se/sb-arkiv/lexikon/kelly/kelly_saldo.txt NOT ANYMORE SINCE IT'S LOCALLY CHANGED cat kelly_saldo.txt | ./skript/lmf.py kelly | tidy -xml -utf8 -i > lmf/kelly/kelly.xml ./skript/validate_xml.bash lmf/kelly/kelly.xml #lwt svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/lwt/lwt-meanings.txt cat lwt-meanings.txt | ./skript/lmf.py lwt | tidy -xml -utf8 -i > lmf/lwt/lwt.xml ./skript/validate_xml.bash lmf/lwt/lwt.xml #wordnet svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/swesaurus/wordnet3/wn3_synsets.txt svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/swesaurus/wordnet3/wordnet-saldo.txt ./skript/lmf.py wordnet | tidy -xml -utf8 -i > lmf/wordnet-saldo/wordnet-saldo.xml ./skript/validate_xml.bash lmf/wordnet-saldo/wordnet-saldo.xml #saldom $SALDO $SALDODICT -p lmf > lmf/saldom/saldom.xml ./skript/validate_xml.bash lmf/saldom/saldom.xml #dalin morphology $DALIN $DALINDICT -p lmf > lmf/dalinm/dalinm.xml ./skript/validate_xml.bash lmf/dalinm/dalinm.xml #fsv morphology $FSV $FSVDICT -p lmf > lmf/fsvm/fsvm.xml ./skript/validate_xml.bash lmf/fsvm/fsvm.xml # Konstruktikon (HTML) # #./skript/konstruktikon/konstruktikon.py > html/constructicon.html #./skript/konstruktikon/konstruktikon.py simplified swedish > html/constructicon_simple_swe.html #./skript/konstruktikon/konstruktikon.py simplified english > html/constructicon_simple_eng.html #./skript/validate_xml.bash html/constructicon.html #./skript/validate_xml.bash html/constructicon_simple_swe.html #./skript/validate_xml.bash html/constructicon_simple_eng.html # Copy Konstruktikon from karp-red to karp-sok svn export --force https://svn.spraakdata.gu.se/repos/karp/trunk/test-data/konstruktikon/konstruktikon.xml cp konstruktikon.xml lmf/konstruktikon/konstruktikon.xml # Konstruktikon (LMF) #svn export --force https://svn.spraakdata.gu.se/sb/fnplusplus/pub/constructicon/constructicon.xml #cat constructicon.xml | ./skript/lmf.py konstruktikon | tidy -xml -utf8 -i > lmf/konstruktikon/konstruktikon.xml #./skript/validate_xml.bash lmf/konstruktikon/konstruktikon.xml # Dalins grundmaterial svn export --force https://svn.spraakdata.gu.se/sb-arkiv/lexikon/dalin/dalin_saldo.txt cat dalin_saldo.txt | ./skript/lmf.py dalinbase | tidy -xml -utf8 -i > lmf/dalin-base/dalin-base.xml ./skript/validate_xml.bash lmf/dalin-base/dalin-base.xml # Fornsvenska grundmaterialet svn export --force https://svn.spraakdata.gu.se/sb-arkiv/lexikon/fsv/fsv.txt # ... # The Cross Pivot resource #cat dalin_saldo.txt | ./skript/lmf.py crosspivot | tidy -xml -utf8 -i > lmf/crosspivot/crosspivot.xml ./skript/lmf.py crosspivot | tidy -xml -utf8 -i > lmf/diapivot/diapivot.xml ./skript/validate_xml.bash lmf/diapivot/diapivot.xml svn ci -m "automatic lexicon update" lmf svn ci -m "automatic lexicon update" html svn ci -m "automatic lexicon update" js # Update metadata from korp and karp (Olof's script) php ./skript/update-karp-stats.php svn ci -m "automatic lexicon update" metadata