hg-fastimport
changeset 11:9e9c215fcbd8
Handle blobs in the fast-import stream.
- write each one to a file in .hg/blobs
- when a blob is referenced, hardlink it into the working dir
- write each one to a file in .hg/blobs
- when a blob is referenced, hardlink it into the working dir
| author | Greg Ward <greg-hg@gerg.ca> |
|---|---|
| date | Tue Mar 31 21:05:43 2009 -0400 (2009-03-31) |
| parents | 18c1e7ac0012 |
| children | 987d1e08bec5 |
| files | fastimport/hgimport.py |
line diff
1.1 --- a/fastimport/hgimport.py 1.2 +++ b/fastimport/hgimport.py 1.3 @@ -22,8 +22,12 @@ 1.4 1.5 import os 1.6 import os.path 1.7 +import errno 1.8 +import shutil 1.9 + 1.10 import mercurial.hg 1.11 import mercurial.commands 1.12 +from mercurial import util 1.13 from mercurial.node import nullrev 1.14 import processor 1.15 1.16 @@ -42,11 +46,39 @@ 1.17 #self.tag_back_map = {} 1.18 self.finished = False 1.19 1.20 + self.numblobs = 0 # for progress reporting 1.21 + self.blobdir = None 1.22 + 1.23 + def teardown(self): 1.24 + if self.blobdir and os.path.exists(self.blobdir): 1.25 + self.ui.status("Removing blob dir %r ...\n" % self.blobdir) 1.26 + shutil.rmtree(self.blobdir) 1.27 + 1.28 def progress_handler(self, cmd): 1.29 self.ui.write("Progress: %s\n" % cmd.message) 1.30 1.31 - # We can't handle blobs - fail 1.32 - #def blob_handler(self, cmd): 1.33 + def blob_handler(self, cmd): 1.34 + if self.blobdir is None: # no blobs seen yet 1.35 + # XXX cleanup? 1.36 + self.blobdir = os.path.join(self.repo.root, ".hg", "blobs") 1.37 + os.mkdir(self.blobdir) 1.38 + 1.39 + fn = self.getblobfilename(cmd.id) 1.40 + blobfile = open(fn, "wb") 1.41 + #self.ui.debug("writing blob %s to %s (%d bytes)\n" 1.42 + # % (cmd.id, fn, len(cmd.data))) 1.43 + blobfile.write(cmd.data) 1.44 + blobfile.close() 1.45 + 1.46 + self.numblobs += 1 1.47 + if self.numblobs % 500 == 0: 1.48 + self.ui.status("%d blobs read\n" % self.numblobs) 1.49 + 1.50 + def getblobfilename(self, blobid): 1.51 + if self.blobdir is None: 1.52 + raise RuntimeError("no blobs seen, so no blob directory created") 1.53 + # XXX should escape ":" for windows 1.54 + return os.path.join(self.blobdir, "blob-" + blobid) 1.55 1.56 def checkpoint_handler(self, cmd): 1.57 # This command means nothing to us 1.58 @@ -89,7 +121,8 @@ 1.59 self.repo.dirstate.setbranch(branch) 1.60 #self.ui.write("Bing\n") 1.61 #print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv" 1.62 - commit_handler = HgImportCommitHandler(cmd, self.ui, self.repo, **self.opts) 1.63 + commit_handler = HgImportCommitHandler( 1.64 + self, cmd, self.ui, self.repo, **self.opts) 1.65 commit_handler.process() 1.66 #print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" 1.67 #self.ui.write(cmd.dump_str(verbose=True)) 1.68 @@ -124,7 +157,8 @@ 1.69 1.70 class HgImportCommitHandler(processor.CommitHandler): 1.71 1.72 - def __init__(self, command, ui, repo, **opts): 1.73 + def __init__(self, parent, command, ui, repo, **opts): 1.74 + self.parent = parent # HgImportProcessor running the show 1.75 self.command = command 1.76 self.ui = ui 1.77 self.repo = repo 1.78 @@ -144,9 +178,29 @@ 1.79 fullpath = os.path.join(self.repo.root, filecmd.path) 1.80 self._make_container(fullpath) 1.81 #print "made dirs, writing file" 1.82 - f.write(filecmd.data) 1.83 - f = open(fullpath, "w") 1.84 - f.close() 1.85 + if filecmd.dataref: 1.86 + # reference to a blob that has already appeared in the stream 1.87 + fn = self.parent.getblobfilename(filecmd.dataref) 1.88 + if os.path.exists(fullpath): 1.89 + os.remove(fullpath) 1.90 + try: 1.91 + os.link(fn, fullpath) 1.92 + except OSError, err: 1.93 + if err.errno == errno.ENOENT: 1.94 + # if this happens, it's a problem in the fast-import 1.95 + # stream 1.96 + raise util.Abort("bad blob ref %r (no such file %s)" 1.97 + % (filecmd.dataref, fn)) 1.98 + else: 1.99 + # anything else is a bug in this extension 1.100 + # (cross-device move, permissions, etc.) 1.101 + raise 1.102 + elif filecmd.data: 1.103 + f = open(fullpath, "w") 1.104 + f.write(filecmd.data) 1.105 + f.close() 1.106 + else: 1.107 + raise RuntimeError("either filecmd.dataref or filecmd.data must be set") 1.108 #print self.repo.add([filecmd.path]) 1.109 #print "Done:", filecmd.path 1.110
