#!/usr/bin/env python from subprocess import Popen, PIPE import re class SVN: def __init__(self,address): stdin = '' command = Popen(['svn','log',address], shell=False, stdin=PIPE, stdout=PIPE) #,stderr=PIPE) (stdout, _) = command.communicate(stdin) revs = re.findall(r'(r\d+)|(\d\d\d\d-\d\d-\d\d)|(\d\d:\d\d)',stdout.decode('UTF-8')) self.list_of_revisions = [] index = 0 while index < len(revs): self.list_of_revisions.append((revs[index][0], revs[index+1][1], revs[index+2][2])) index += 3 def revisions(self): return self.list_of_revisions def _sort_neg_pos(self,lst): positive = set() negative = set() for change in lst: (pos,neg) = (set(),set()) for line in change: if line[0] == '+': pos.add(line[1:].split('\t')[0]) else: neg.add(line[1:].split('\t')[0]) if len(neg) == 0: positive.update(pos) elif len(pos) == 0: negative.update(neg) return (positive,negative) def difference(self,revision1,revision2): stdin = '' command = Popen(['svn', 'diff','-r', '%s:%s' % (revision1,revision2),address], shell=False, stdin=PIPE, stdout=PIPE) #,stderr=PIPE) (stdout, _) = command.communicate(stdin) return self._sort_neg_pos( [[c for c in re.split(r'\n',s) if c != '' and c[0] in '+-'] for s in re.split(r'@@.+@@\n',stdout.decode('UTF-8'))]) def ref(address,item): if address == '': return item else: return '%s' % (address,item,item) def to_word(lex): return re.split(r'\.\.',lex)[0].replace('_',' ') def produce_rss(address,number=20,ref_address=''): svn = SVN(address) revs = svn.revisions() result = ''' SALDO dev http://spraakbanken.gu.se/saldo SALDO dev SALDO dev http://spraakbanken.gu.se/sites/spraakbanken.gu.se/files/img/saldo/saldo.gif http://spraakbanken.gu.se/saldo \n''' count = 0 for i in range(len(revs)-1): (rev,d,t) = revs[i] (rev2,_,_) = revs[i+1] (pos,neg) = svn.difference(rev2,rev) if len(pos) > 0: count += 1 if len(pos) > 1: dots = ' ...' else: dots = '' result += ' \n' result += ' %s%s (%s %s)\n' % (list(pos)[0],dots,d,t) result += ' %s\n' % (rev) result += ' http://spraakbanken.gu.se/eng/research/saldo/history#%s\n' % (rev) result += ' %s\n' %(", ".join([p for p in sorted(pos)])) result += ' \n' if count >= number: break result += '\n' result += '' return result if __name__ == '__main__': address = 'https://svn.spraakdata.gu.se/repos/sblex/sal/trunk/saldo30.txt' print produce_rss(address, number=20, ref_address='http://spraakbanken.gu.se/ws/saldo-ws/lid/html/').encode('UTF-8')