hg-fastimport

changeset 11:9e9c215fcbd8

Handle blobs in the fast-import stream.
- write each one to a file in .hg/blobs
- when a blob is referenced, hardlink it into the working dir
author Greg Ward <greg-hg@gerg.ca>
date Tue Mar 31 21:05:43 2009 -0400 (2009-03-31)
parents 18c1e7ac0012
children 987d1e08bec5
files fastimport/hgimport.py
line diff
     1.1 --- a/fastimport/hgimport.py
     1.2 +++ b/fastimport/hgimport.py
     1.3 @@ -22,8 +22,12 @@
     1.4  
     1.5  import os
     1.6  import os.path
     1.7 +import errno
     1.8 +import shutil
     1.9 +
    1.10  import mercurial.hg
    1.11  import mercurial.commands
    1.12 +from mercurial import util
    1.13  from mercurial.node import nullrev
    1.14  import processor
    1.15  
    1.16 @@ -42,11 +46,39 @@
    1.17          #self.tag_back_map = {}
    1.18          self.finished = False
    1.19  
    1.20 +        self.numblobs = 0               # for progress reporting
    1.21 +        self.blobdir = None
    1.22 +
    1.23 +    def teardown(self):
    1.24 +        if self.blobdir and os.path.exists(self.blobdir):
    1.25 +            self.ui.status("Removing blob dir %r ...\n" % self.blobdir)
    1.26 +            shutil.rmtree(self.blobdir)
    1.27 +
    1.28      def progress_handler(self, cmd):
    1.29          self.ui.write("Progress: %s\n" % cmd.message)
    1.30  
    1.31 -    # We can't handle blobs - fail
    1.32 -    #def blob_handler(self, cmd):
    1.33 +    def blob_handler(self, cmd):
    1.34 +        if self.blobdir is None:        # no blobs seen yet
    1.35 +            # XXX cleanup?
    1.36 +            self.blobdir = os.path.join(self.repo.root, ".hg", "blobs")
    1.37 +            os.mkdir(self.blobdir)
    1.38 +
    1.39 +        fn = self.getblobfilename(cmd.id)
    1.40 +        blobfile = open(fn, "wb")
    1.41 +        #self.ui.debug("writing blob %s to %s (%d bytes)\n"
    1.42 +        #              % (cmd.id, fn, len(cmd.data)))
    1.43 +        blobfile.write(cmd.data)
    1.44 +        blobfile.close()
    1.45 +
    1.46 +        self.numblobs += 1
    1.47 +        if self.numblobs % 500 == 0:
    1.48 +            self.ui.status("%d blobs read\n" % self.numblobs)
    1.49 +
    1.50 +    def getblobfilename(self, blobid):
    1.51 +        if self.blobdir is None:
    1.52 +            raise RuntimeError("no blobs seen, so no blob directory created")
    1.53 +        # XXX should escape ":" for windows
    1.54 +        return os.path.join(self.blobdir, "blob-" + blobid)
    1.55  
    1.56      def checkpoint_handler(self, cmd):
    1.57          # This command means nothing to us
    1.58 @@ -89,7 +121,8 @@
    1.59          self.repo.dirstate.setbranch(branch)
    1.60          #self.ui.write("Bing\n")
    1.61          #print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv"
    1.62 -        commit_handler = HgImportCommitHandler(cmd, self.ui, self.repo, **self.opts)
    1.63 +        commit_handler = HgImportCommitHandler(
    1.64 +            self, cmd, self.ui, self.repo, **self.opts)
    1.65          commit_handler.process()
    1.66          #print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
    1.67          #self.ui.write(cmd.dump_str(verbose=True))
    1.68 @@ -124,7 +157,8 @@
    1.69  
    1.70  class HgImportCommitHandler(processor.CommitHandler):
    1.71  
    1.72 -    def __init__(self, command, ui, repo, **opts):
    1.73 +    def __init__(self, parent, command, ui, repo, **opts):
    1.74 +        self.parent = parent            # HgImportProcessor running the show
    1.75          self.command = command
    1.76          self.ui = ui
    1.77          self.repo = repo
    1.78 @@ -144,9 +178,29 @@
    1.79          fullpath = os.path.join(self.repo.root, filecmd.path)
    1.80          self._make_container(fullpath)
    1.81          #print "made dirs, writing file"
    1.82 -        f.write(filecmd.data)
    1.83 -        f = open(fullpath, "w")
    1.84 -        f.close()
    1.85 +        if filecmd.dataref:
    1.86 +            # reference to a blob that has already appeared in the stream
    1.87 +            fn = self.parent.getblobfilename(filecmd.dataref)
    1.88 +            if os.path.exists(fullpath):
    1.89 +                os.remove(fullpath)
    1.90 +            try:
    1.91 +                os.link(fn, fullpath)
    1.92 +            except OSError, err:
    1.93 +                if err.errno == errno.ENOENT:
    1.94 +                    # if this happens, it's a problem in the fast-import
    1.95 +                    # stream
    1.96 +                    raise util.Abort("bad blob ref %r (no such file %s)"
    1.97 +                                     % (filecmd.dataref, fn))
    1.98 +                else:
    1.99 +                    # anything else is a bug in this extension
   1.100 +                    # (cross-device move, permissions, etc.)
   1.101 +                    raise
   1.102 +        elif filecmd.data:
   1.103 +            f = open(fullpath, "w")
   1.104 +            f.write(filecmd.data)
   1.105 +            f.close()
   1.106 +        else:
   1.107 +            raise RuntimeError("either filecmd.dataref or filecmd.data must be set")
   1.108          #print self.repo.add([filecmd.path])
   1.109          #print "Done:", filecmd.path
   1.110