#!/usr/local/bin/python # # Create svn changesets from cvs repository incrementally. This script # may be replaced by `cvsps'. I don't know. # # $Id: cvsconv.py 134740 2011-10-10 04:33:51Z yasuoka $ import os import rcsparse import re import string import sys import time from hashlib import md5 from svn import core, fs, delta, repos def usage(): print >>sys.stderr, 'usage: cvs2svndump cvsroot [svnroot svnpath]' def main(): if len(sys.argv) != 2 and len(sys.argv) != 4: usage() sys.exit(1) cvsroot = sys.argv[1] if len(sys.argv) == 4: svnroot = sys.argv[2] svnpath = sys.argv[3] else: svnroot = None svnpath = None do_incremental = False found_last_revision = False cvs = CvsConv(cvsroot, None) if svnroot is None: svn = SvnDumper() else: svn = SvnDumper(svnpath) try: svn.load(svnroot) if svn.last_rev is not None: do_incremental = True print >>sys.stderr, '** svn loaded revision r%d by %s' % \ (svn.last_rev, svn.last_author) except: pass print >>sys.stderr, '** walk cvs tree' cvs.walk() svn.dump = True changesets = sorted(cvs.changesets) nchangesets = len(changesets) print >>sys.stderr, '** cvs has %d changeset' % (nchangesets) if nchangesets <= 0: sys.exit(0) # don't use last 1 hour for safety dmax = changesets[-1].max_time - 3600 printOnce = False for i, k in enumerate(changesets): if do_incremental and not found_last_revision: if k.max_time == svn.last_date and k.author == svn.last_author: found_last_revision = True continue if k.max_time > dmax: continue if not printOnce: print 'SVN-fs-dump-format-version: 2' print '' printOnce = True # parse the first file to get log finfo = k.revs[0] rcsfile = rcsparse.rcsfile(finfo[2]) log = rcsfile.getlog(finfo[0]) revprops = str_prop('svn:author', k.author) revprops += str_prop('svn:date', svn_time(k.max_time)) revprops += str_prop('svn:log', log) revprops += 'PROPS-END\n' print 'Revision-number: %d' % (i + 1) print 'Prop-content-length: %d' % (len(revprops)) print 'Content-length: %d' % (len(revprops)) print '' print revprops for f in k.revs: rcsfile = rcsparse.rcsfile(f[2]) fileprops = '' if os.access(f[2], os.X_OK): fileprops += str_prop('svn:executable', '*') fileprops += 'PROPS-END\n' filecont = rcs_expand_keyword(f[2], f[0]) md5sum = md5() md5sum.update(filecont) p = node_path(cvs.cvsroot, svnpath, f[2]) if f[3] == 'dead': if not svn.exists(p): print >> sys.stderr, "Warning: remove '%s', but it does "\ "not exist." % (p) continue print 'Node-path: %s' % (p) print 'Node-kind: file' print 'Node-action: delete' print '' svn.remove(p) continue elif not svn.exists(p): svn.add(p) print 'Node-path: %s' % (p) print 'Node-kind: file' print 'Node-action: add' else: print 'Node-path: %s' % (p) print 'Node-kind: file' print 'Node-action: change' print 'Prop-content-length: %d' % (len(fileprops)) print 'Text-content-length: %s' % (len(filecont)) print 'Text-content-md5: %s' % (md5sum.hexdigest()) print 'Content-length: %d' % (len(fileprops) + len(filecont)) print '' print fileprops + filecont print '' print >>sys.stderr, '** dumped' MAX_COMMIT_SEC = 90 class ChangeSetKey: def __init__(self, branch, author, time, log): self.branch = branch self.author = author self.min_time = time self.max_time = time self.revs = [] self.log_hash = 0 h = 0 for c in log: h = 31 * h + ord(c) self.log_hash = h def __cmp__(self, anon): if isinstance(anon, ChangeSetKey): ma = anon.max_time - self.max_time mi = self.min_time - anon.min_time if ma > MAX_COMMIT_SEC: return -ma if mi > MAX_COMMIT_SEC: return mi if cmp(self.log_hash, anon.log_hash) != 0 or \ cmp(self.branch, anon.branch) != 0 or \ cmp(self.author, anon.author): return mi if mi != 0 else -ma; return 0 return -1 def merge(self, anon): self.max_time = max(self.max_time, anon.max_time) self.min_time = min(self.min_time, anon.min_time) def __hash__(self): return hash(self.branch + '/' + self.author) * 31 + self.log_hash; class CvsConv: def __init__(self, cvsroot, module = None): self.cvsroot = cvsroot self.module = module self.changesets = dict() def walk(self): p = [self.cvsroot] if self.module is not None: p.append(self.module) path = reduce(os.path.join, p) for root, dirs, files in os.walk(path): for f in files: if not f[-2:] == ',v': continue self.parse_file(root + os.sep + f) def parse_file(self, path): rcsfile=rcsparse.rcsfile(path) path_related = path[len(self.cvsroot) + 1:][:-2] branches = {'1': 'HEAD', '1.1.1': 'VENDOR' } have_111 = False for k,v in rcsfile.symbols.items(): r = v.split('.') if len(r) == 3: branches[v] = 'VENDOR' elif len(r) >= 3 and r[-2] == '0': z = reduce(lambda a, b: a + '.' + b, r[:-2] + r[-1:]) branches[reduce(lambda a, b: a + '.' + b, r[:-2] + r[-1:])] = k # sort by time and revision revs = sorted(rcsfile.revs.items(), \ lambda a,b: cmp(a[1][1], b[1][1]) or cmp(b[1][0], a[1][0])) p = '0' novendor = False have_initial_revision = False; for k,v in revs: r = k.split('.') if len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1' \ and r[3] == '1': if have_initial_revision: continue if v[3] == 'dead': continue have_initial_revision = True elif len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1': if novendor: continue elif len(r) == 2: if r[0] == '1' and r[1] == '1': if have_initial_revision: continue if v[3] == 'dead': continue have_initial_revision = True elif r[0] == '1' and r[1] != '1': novendor = True else: # trunk only continue b = reduce(lambda a, b: a + '.' + b, r[:-1]) a = ChangeSetKey(branches[b], v[2], v[1], rcsfile.getlog(v[0])) try: c = self.changesets[a] del self.changesets[a] c.merge(a) a = c except KeyError: pass self.changesets[a] = a self.changesets[a].revs.append([k, p, path, v[3]]) p = k def svn_time(t): import time return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(t)) def str_prop(k,v): return 'K %d\n%s\nV %d\n%s\n' % (len(k), k, len(v), v) def node_path(r,n,p): if r.endswith('/'): r = r[:-1] path = p[:-2] p = path.split('/') if len(p) > 0 and p[-2] == 'Attic': path = string.join(p[:-2], '/') + '/' + p[-1] if path.startswith(r): path = path[len(r) + 1:] if n is None or len(n) == 0: return path return '%s/%s' % (n, path) class SvnDumper: def __init__(self, root=''): self.root = root if self.root != '' and self.root[-1] == '/': self.root = self.root[:-1] self.dirs = {} self.dirs[self.root] = {'dontdelete': 1} self.dump = False def exists(self, path): d = os.path.dirname(path) if not self.dirs.has_key(d): return False return self.dirs[d].has_key(os.path.basename(path)) def add(self, path): d = os.path.dirname(path) if not self.dirs.has_key(d): self.mkdir(d) self.dirs[d][os.path.basename(path)] = 1 def remove(self, path): d = os.path.dirname(path) if d == path: return del self.dirs[d][os.path.basename(path)] self.rmdir(d) def rmdir(self, path): if len(self.dirs[path]) > 0: return for r in self.dirs.keys(): if r != path and r.startswith(path + '/'): return if self.dump: print 'Node-path: %s' % (path) print 'Node-kind: dir' print 'Node-action: delete' print '' del self.dirs[path] d = os.path.dirname(path) if d == path or not self.dirs.has_key(d): return self.rmdir(d) def mkdir(self, path): if not self.dirs.has_key(path): d = os.path.dirname(path) if d == path: return self.mkdir(d) if self.dump: print 'Node-path: %s' % (path) print 'Node-kind: dir' print 'Node-action: add' print '' print '' self.dirs[path] = {} def load(self, repo_path): repo_path = core.svn_path_canonicalize(repo_path) repos_ptr = repos.open(repo_path) fs_ptr = repos.fs(repos_ptr) rev = fs.youngest_rev(fs_ptr) base_root = fs.revision_root(fs_ptr, 0) root = fs.revision_root(fs_ptr, rev) hist = fs.node_history(root, self.root) while hist is not None: hist = fs.history_prev(hist,0) dummy,rev = fs.history_location(hist) d = fs.revision_prop(fs_ptr, rev, core.SVN_PROP_REVISION_DATE) author = fs.revision_prop(fs_ptr, rev, \ core.SVN_PROP_REVISION_AUTHOR) if author == 'svnadmin': continue self.last_author = author self.last_date = core.svn_time_from_cstring(d) / 1000000 self.last_rev = rev def authz_cb(root, path, pool): return 1 editor = SvnDumperEditor(self) e_ptr, e_baton = delta.make_editor(editor) repos.dir_delta(base_root, '', '', root, self.root, e_ptr, e_baton, authz_cb, 0, 1, 0, 0) break class SvnDumperEditor(delta.Editor): def __init__(self, dumper): self.dumper = dumper def add_file(self, path, *args): self.dumper.add(self.dumper.root + '/' + path) def add_directory(self, path, *args): self.dumper.mkdir(self.dumper.root + '/' + path) # ---------------------------------------------------------------------- # RCS Keywords # ---------------------------------------------------------------------- re_kw = re.compile(r".*?\$(Author|Date|Header|Id|OpenBSD|Log|Name|RCSfile|Revision|Source|State|Mdocdate)[\$:]") RCS_KW_AUTHOR = (1 << 0) RCS_KW_DATE = (1 << 1) RCS_KW_LOG = (1 << 2) RCS_KW_NAME = (1 << 3) RCS_KW_RCSFILE = (1 << 4) RCS_KW_REVISION = (1 << 5) RCS_KW_SOURCE = (1 << 6) RCS_KW_STATE = (1 << 7) RCS_KW_FULLPATH = (1 << 8) RCS_KW_MDOCDATE = (1 << 9) RCS_KW_ID = (RCS_KW_RCSFILE | RCS_KW_REVISION | RCS_KW_DATE | RCS_KW_AUTHOR | RCS_KW_STATE) RCS_KW_HEADER = (RCS_KW_ID | RCS_KW_FULLPATH) rcs_expkw = { "Author": RCS_KW_AUTHOR, "Date": RCS_KW_DATE , "Header": RCS_KW_HEADER, "Id": RCS_KW_ID, "OpenBSD": RCS_KW_ID, "Log": RCS_KW_LOG, "Name": RCS_KW_NAME, "RCSfile": RCS_KW_RCSFILE, "Revision": RCS_KW_REVISION, "Source": RCS_KW_SOURCE, "State": RCS_KW_STATE, "Mdocdate": RCS_KW_MDOCDATE } RCS_KWEXP_NONE = (1 << 0) RCS_KWEXP_NAME = (1 << 1) # include keyword name RCS_KWEXP_VAL = (1 << 2) # include keyword value RCS_KWEXP_LKR = (1 << 3) # include name of locker RCS_KWEXP_OLD = (1 << 4) # generate old keyword string RCS_KWEXP_ERR = (1 << 5) # mode has an error RCS_KWEXP_DEFAULT = (RCS_KWEXP_NAME | RCS_KWEXP_VAL) RCS_KWEXP_KVL = (RCS_KWEXP_NAME | RCS_KWEXP_VAL | RCS_KWEXP_LKR) def rcs_kflag_get(flags): if flags is None: return RCS_KWEXP_DEFAULT fl = 0 for fc in flags: if fc == 'k': fl |= RCS_KWEXP_NAME elif fc == 'v': fl |= RCS_KWEXP_VAL elif fc == 'l': fl |= RCS_KWEXP_LKR elif fc == 'o': if len(flags) != 1: fl |= RCS_KWEXP_ERR fl |= RCS_KWEXP_OLD elif fc == 'b': if len(flags) != 1: fl |= RCS_KWEXP_ERR fl |= RCS_KWEXP_NONE else: fl |= RCS_KWEXP_ERR return fl def split_lines(buf): lines = [] o = 0; while o < len(buf): try: nl = string.index(buf, '\n', o) if nl >= 0: lines.append(buf[o:nl + 1]) o = nl + 1 except: break if o < len(buf): lines.append(buf[o:]) return lines def rcs_expand_keyword(filename, r): def trim_trailing_space(str): i = 0 while i < len(str) and (str[-i - 1] == ' ' or str[-i - 1] == '\t'): i = i + 1 return str[:-i] if i > 0 else str rcs = rcsparse.rcsfile(filename) rev = rcs.revs[r] mode = rcs_kflag_get(rcs.expand) if (mode & (RCS_KWEXP_NONE | RCS_KWEXP_OLD)) != 0: return rcs.checkout(rev[0]) s = logbuf = '' for line in split_lines(rcs.checkout(rev[0])): while True: m = re_kw.match(line) if m is None: break if len(line) > m.end(1) and line[m.end(1)] == '$': dsign = m.end(1) else: try: dsign = string.index(line, '$', m.end(1)) if dsign < 0: break except: break prefix = line[:m.start(1)-1] s += prefix expbuf = '' if (mode & RCS_KWEXP_NAME) != 0: expbuf += '$' expbuf += m.group(1) if (mode & RCS_KWEXP_VAL) != 0: expbuf += ': ' if (mode & RCS_KWEXP_VAL) != 0: expkw = rcs_expkw[m.group(1)] if (expkw & RCS_KW_RCSFILE) != 0: expbuf += filename \ if (expkw & RCS_KW_FULLPATH) != 0 \ else os.path.basename(filename) expbuf += " " if (expkw & RCS_KW_REVISION) != 0: expbuf += rev[0] expbuf += " " if (expkw & RCS_KW_DATE) != 0: expbuf += time.strftime("%Y/%m/%d %H:%M:%S ", \ time.gmtime(rev[1])) if (expkw & RCS_KW_MDOCDATE) != 0: d = time.gmtime(rev[1]) expbuf += time.strftime( \ "%B%e %Y " if (d.tm_mday < 10) else "%B %e %Y ", d) if (expkw & RCS_KW_AUTHOR) != 0: expbuf += rev[2] expbuf += " " if (expkw & RCS_KW_STATE) != 0: expbuf += rev[3] expbuf += " " if (expkw & RCS_KW_LOG) != 0: p = trim_trailing_space(prefix) expbuf += filename \ if (expkw & RCS_KW_FULLPATH) != 0 \ else os.path.basename(filename) expbuf += " " logbuf += '%s Revision %s ' % (p, rev[0]) logbuf += time.strftime("%Y/%m/%d %H:%M:%S ",\ time.gmtime(rev[1])) logbuf += rev[2] + '\n' for lline in split_lines(rcs.getlog(rev[0])): logbuf += p + ' ' + lline logbuf += p + '\n' if (expkw & RCS_KW_SOURCE) != 0: expbuf += filename expbuf += " " if (expkw & RCS_KW_NAME) != 0: expbuf += " " if (mode & RCS_KWEXP_NAME) != 0: expbuf += '$' s += expbuf[:255] line = line[dsign + 1:] s += line if len(logbuf) > 0: s += logbuf logbuf = '' return s # ---------------------------------------------------------------------- # entry point # ---------------------------------------------------------------------- if __name__ == '__main__': main();