diff options
author | Brian Harring <ferringb@google.com> | 2012-10-22 19:30:33 -0700 |
---|---|---|
committer | Brian Harring <ferringb@google.com> | 2012-10-22 19:30:33 -0700 |
commit | 7e0f9e20c7b1e356a11502d3be88983388348873 (patch) | |
tree | 224bf7c5e8598965d5bf4eb2a9139e284aaa6fec | |
parent | Drop the prune; it's unnecessary (diff) | |
download | git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.tar.gz git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.tar.bz2 git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.zip |
Rework the blob rewriting of $Header.
Specifically, fix the replacement so it actually matches/replaces,
additionally fix the oversight where the code wasn't rewriting the
data statement to the new length.
-rwxr-xr-x | process_directory.sh | 25 | ||||
-rwxr-xr-x | rewrite-git-blob.py | 75 |
2 files changed, 87 insertions, 13 deletions
diff --git a/process_directory.sh b/process_directory.sh index fa9a0ee..6d59677 100755 --- a/process_directory.sh +++ b/process_directory.sh @@ -1,17 +1,15 @@ #!/bin/bash -command=' - sed -re "s/^\(paludis (0.1.*)\)$/Package-manager: Paludis \1/" \ - -e "s/^\([Pp]ortage version: (.*)\)$/Package-manager: Portage \1/"' + f() { set -x mkdir -p "${output}"/{git,cvs-repo/gentoo-x86/Attic} ln -s "${cvsroot}" "${output}/cvs-repo/CVSROOT" ln -s "${root}/gentoo-x86/$1" "${output}/cvs-repo/gentoo-x86/$1" #ln -s "${root}/gentoo-x86/Attic" "${output}/cvs-repo/gentoo-x86/Attic" - ln -s "$(pwd)/config" "${output}/config" - ln -s "$(pwd)/gentoo_mailmap.py" "${output}/gentoo_mailmap.py" + ln -s "${base}/config" "${output}/config" + ln -s "${base}/gentoo_mailmap.py" "${output}/gentoo_mailmap.py" # Note- this must be canonical path, else it screws up our $Header rewriting. - cd "$(readlink -f "${output}" )" + pushd "$(readlink -f "${output}" )" export PYTHONPATH="${output}${PYTHONPATH:+:${PYTHONPATH}}" time cvs2git --options config -v cd git @@ -19,24 +17,25 @@ f() { # Note we're only pull in blob data here; this intentional- we need to # interlace the commit objects together, these git object pools will be # be used as alternates for the final repo combination. - sed -re \ - 's|\$Header: '"$(readlink -f "$(pwd)")"'/*output/.*/cvs-repo/|$Header: /var/cvsroot/|g' \ - ../cvs2svn-tmp/git-blob.dat | \ + "${base}/rewrite-git-blob.py" \ + ../cvs2svn-tmp/git-blob.dat "${output}/cvs-repo" | \ + tee ../cvs2svn-tmp/rewritten-blob.dat | \ git fast-import --export-marks=../cvs2svn-tmp/git-blob.idx + popd rm -rf "${final}" - cd "$root" mv "$output" "${final}" set +x } [ $# -lt 1 ] && { echo "need an argument..."; exit 1; } +cd "$(readlink -f "$(pwd)")" base="$(pwd)" -root="$(pwd)/cvs-repo" +root="${base}/cvs-repo" cvsroot="${root}/CVSROOT" repo="${root}/gentoo-x86" -output="$(pwd)/output/${1%,v}" -final="$(pwd)/final/$1" +output="${base}/output/${1%,v}" +final="${base}/final/$1" mkdir -p "$(dirname "${final}")" rm -rf "${output}" diff --git a/rewrite-git-blob.py b/rewrite-git-blob.py new file mode 100755 index 0000000..430e415 --- /dev/null +++ b/rewrite-git-blob.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +import contextlib +import collections +import functools +import itertools +import mmap +import multiprocessing +import operator +import os +import re +import subprocess +import sys + +@contextlib.contextmanager +def mmap_open(path): + handle = fd = None + try: + fd = os.open(path, os.O_RDONLY) + handle = mmap.mmap(fd, os.fstat(fd).st_size, mmap.MAP_SHARED, mmap.PROT_READ) + os.close(fd) + fd = None + yield handle + finally: + if fd: + os.close(fd) + if handle: + handle.close() + +def readline_iterate(handle): + line = handle.readline() + while line: + yield line + line = handle.readline() + +mangler = [] +mangler.append(functools.partial( + re.compile(r"^\(paludis (0.1.*)\)$", re.M|re.I).sub, + r"Package-Manager: paludis-\1/")) +# Special case not covered by the main portage mangler. +mangler.append(functools.partial( + re.compile('r^\(Portage (2\.1\.2[^\)]+)\)$', re.M|re.I).sub, + r'Package-Manager: portage-\1')) +mangler.append(functools.partial( + re.compile(r' *\((?:manifest +recommit|(?:un)?signed +manifest +commit)\) *$', re.M|re.I).sub, + r'')) + +def process_stream(source, output_dir, output): + header = os.path.normpath(os.path.abspath(output_dir)) + header = "$Header: %s" % output_dir + line = source.readline() + while line: + chunks = line.split() + if chunks[0:1] == ['data']: + # Process the commit message... + size = int(chunks[1]) + data = source.read(size) + assert len(data) == size, (line, data) + data = data.replace(header, "$Header: /var/cvsroot") + line = 'data %i\n%s' % (len(data), data) + output.write(line) + line = source.readline() + +def main(blob_file, output_dir, output): + # allocate the pool now, before we start getting memory abusive; this is + # used for thin-manifest conversion if active/enabled. + #clean_pool = multiprocessing.Pool() + + # Be careful here to just iterate over source; doing so allows this script + # to do basic processing as it goes (specifically while it's being fed from + # the mainline cvs2git parallelized repo creator). + with mmap_open(blob_file) as data: + process_stream(data, output_dir, sys.stdout) + +if __name__ == '__main__': + sys.exit(main(sys.argv[1], sys.argv[2], sys.stdout)) |