User:Tardis/regdiff.py
Appearance
#!/usr/bin/env python
#regdiff.py
#Created November 9 2008
#Updated November 10 2008
#Version 0.3.1
#This program is free software; you can redistribute it and/or modify it under
#the terms of the GNU General Public License version 2, the GNU Free
#Documentation License version 1.2 (with no Invariant Sections, with no
#Front-Cover Texts, and with no Back-Cover Texts), or (at your option) any
#later version of either license. It is distributed in the hope that it will
#be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
#Public License for more details.
#Description:
#Compares two Windows registry (.REG) files and produces a third such that
#applying it transforms a registry represented by the first into a registry
#represented by the second. Usually you should redirect the output to a file.
#Set displayProgress to a positive value n to report with a # every n lines.
#Warnings:
#The output deletes all keys and values present in the first but not in the
#second! Therefore, this should be run only on exports of complete subtrees.
#The input must be sorted (values sorted within each key); I believe that
#Regedit's export does this, and will also guarantee complete subtrees.
#It's probably wise to double check any key removals in the output: look for
#lines beginning with the two characters "[-". Modern .REG files use UTF-16,
#so some editors and tools (like grep) may have trouble with them. Using
#"grep '^.\[.-' out.reg" should work to at least detect removals.
#You can disable deletions of keys or values entirely by setting the
#appropriate variables (allow*Delete). If keys cannot be deleted but values
#can, each value named in the old file under a key that would be deleted will
#be deleted. (This is not as dangerous because reapplying the old file will
#restore them.)
#History:
#Version 0.1, November 9 2008:
# - initial release
#Version 0.2, November 9 2008:
# - use CRLF newlines
# - support deletion disabling
# - detect misorderings
#Version 0.3, November 9 2008:
# - make allowKeyDelete apply to the last keys to delete
# - verify continuous progress
# - support displaying progress
#Version 0.3.1, November 10 2008:
# - use case-insensitive and subkey-aware comparisons for ordering
#Bugs:
#Assumes that no key contains assignments to @ and to "" -- I think the latter
#is invalid anyway.
#I don't know whether .REG files are really UTF-16 or UCS-2.
#I'm not sure that the last blank line is really necessary; a trailing CRLF
#may be sufficient.
import sys,codecs
def keycompare(a,b):
"""Return an integer indicating the ordering of keys a and b."""
return cmp(a.lower().split('\\'),b.lower().split('\\'))
class line(object):
def __init__(self,s,k=None):
"""Parse s and make a line object.
Inherit key from line k unless we are a key or it is omitted or None.
Use k to detect misordered input if it is not None.
Names are not unescaped, but escaping is considered in their extent."""
self.old=False
self.str=s
self.eof=s is None
self.iskey=not self.eof and len(s)>1 and s[0]=='['
index=1 # points past end of name
if self.eof:
self.name=None
self.delete=False
self.lastkey=None
elif self.iskey:
self.delete=s[1]=='-'
self.lastkey=self.name=s[1+self.delete:-3] # ends in "]\r\n"
if k is not None and k.lastkey is not None and\
keycompare(self.lastkey,k.lastkey)<0:
raise ValueError,"key %r precedes %r in input"%\
(k.lastkey,self.lastkey)
else:
if s[0]=='"':
quote=False
for c in s[1:]:
index+=1
if quote: quote=False
elif c=='\\': quote=True
elif c=='"': break
else: raise IOError,"unterminated name in "+repr(s)
elif s[0]!='@': raise IOError,"unrecognized format: "+repr(s)
# The name for @ is "", which properly sorts before everything.
self.name=s[1:index-1]
assign=s[index:].lstrip()
if len(assign)<2 or assign[0]!='=':
raise IOError,"no assignment in" +repr(s)
self.delete=assign[1]=='-'
if k is None:
self.lastkey=None
else:
self.lastkey=k.lastkey
if not k.iskey and self.name.lower()<k.name.lower():
raise ValueError,"value %r precedes %r in input"%\
(k.name,self.name)
def valname(self):
"""Return the original form of this value's name."""
if self.iskey: raise ValueError,"this is not a value"
return '"'+self.name+'"' if self.name else '@'
def __str__(self):
return self.__class__.__name__+\
('['+("EOF" if self.eof else
repr(self.str)+(" (key)" if self.iskey else
" in key "+repr(self.lastkey)))+
": "+repr(self.name)+']')
class keyprint(object):
def __init__(self,o):
self.key=None
self.out=o
def __call__(self,k):
if k!=self.key:
self.key=k
self.out.write("\r\n["+k+"]\r\n")
def terminated(s):
"""Return true if the string contains an even number of unquoted \"s and does
not end in an unescaped backslash."""
quote=False
ret=True
for c in s:
if quote: quote=False
elif c=='\\': quote=True
elif c=='"': ret=not ret
return ret and not quote
def nextLogical(f):
"""Return the next logical line from a file object.
Never returns a null string.
Return None at EOF."""
ret=""
done=False
while not done:
l=f.readline()
if l=="":
if ret=="": return None
else: raise IOError,"file ends with escape or in string"
c=l.rstrip('\r\n')
if c=="" and ret=="": continue # skip initial blank lines
ret+=c
done=terminated(ret)
if c!=l: ret+="\r\n"
return ret
def isunder(s,r):
"""Return non-nil if the key s is in the tree rooted at r."""
return s==r or s.startswith(r+'\\');
if len(sys.argv)!=3:
print >>sys.stderr,"usage: "+sys.argv[0]+" old.reg new.reg"
sys.exit(2) # BAD_ARGS
allowKeyDelete=True
allowValueDelete=True
displayProgress=0
ci=codecs.lookup("utf_16")
fo=ci.streamreader(open(sys.argv[1],'rb'))
fn=ci.streamreader(open(sys.argv[2],'rb'))
out=ci.streamwriter(sys.stdout)
kp=keyprint(out)
head=fo.readline()
if fn.readline()!=head:
raise IOError,"different file headers"
out.write(head.rstrip('\r\n')+"\r\n")
o=n=line(None)
o.old=True
killing=False # the tree being deleted, if any
iters=0
while True:
iters+=1
if displayProgress and iters%displayProgress==0:
sys.stderr.write('#')
if o.old: o=line(nextLogical(fo),o)
if n.old: n=line(nextLogical(fn),n)
if o.eof and n.eof: break
if o.delete or n.delete: raise IOError,"input contains deletion requests"
# Determine which line logically comes first; all keys come after all
# values (since the values go with a previous key), and EOF comes after
# everything. Positive values mean that n comes first.
c=o.eof-n.eof or keycompare(o.lastkey,n.lastkey) or\
o.iskey-n.iskey or cmp(o.name.lower(),n.name.lower())
o.old=c<=0
n.old=c>=0
assert o.old or n.old,"not advancing in the file"
if killing and (o.eof or not isunder(o.lastkey,killing)): killing=False
if not killing:
if c<0:
if o.iskey:
# Delete a whole key if the new file is past all its subkeys.
# Note that n.lastkey!=o.name, because n must be a key.
if (n.eof or not isunder(n.lastkey,o.name)) and allowKeyDelete:
killing=o.name
out.write("\r\n[-"+o.name+"]\r\n")
elif allowValueDelete:
kp(o.lastkey)
out.write(o.valname()+"=-\r\n")
elif not n.iskey and n.str!=o.str:
kp(n.lastkey)
out.write(n.str)
out.write("\r\n")
if displayProgress: sys.stderr.write('\n')