#!/usr/bin/python -utt # vim:et:sw=4 '''Unzip an ODF document and list or kill/substitute text styles. Usage: kstyles INPUT OUTPUT [-l] [-dsty1]... [sty1=sty2]... INPUT name of file to read OUTPUT name of file to write new (modified) file -l list styles -dsty1 Show text spans having property sty1 sty1=sty2 Text elements which are sty1 are assigned sty2 $Id: kstyles.py,v 0.6 2014/05/10 22:19:13 collin Exp collin $ ''' import codecs import os import sys import xml.dom.minidom import zipfile CONTENT = 'content.xml' STYLES = 'styles.xml' STYLE = 'style:style' SFAMILY = 'style:family' SNAME = 'style:name' TSTYLENAME = 'text:style-name' TSPAN = 'text:span' def main(args): '''Unpack ODF/XML (zipfile). Discover text styles. Find # of text elements which have each style; if "-l", display. If "-dXX", display text:spans with style XX For each "sty1=sty2" provided: change any text elements of sty1 to sty2''' try: infile_name = args[0] outfile_name = args[1] ops = args[2:] except: usage() if not os.path.exists(infile_name): print "Couldn't find input file %s" % infile_name usage() INFILE = zipfile.ZipFile(infile_name, 'r') if os.path.exists(outfile_name): if os.path.realpath(infile_name) == os.path.realpath(outfile_name): # Don't ask... print "I can't update a file in place: %s" % outfile_name usage() print "Output file exists: %s" % outfile_name print "Overwrite? Hit , or ^C to abort now. ", junk = sys.stdin.readline() # Sanity-check input file before doing anything else. if CONTENT not in INFILE.namelist(): print "Couldn't find %s in %s's zip archive" % (CONTENT, infile_name) print 'Is it an ODF file?' sys.exit(1) # Read and parse content. adom = dict() text_styles = list() for afile in [X for X in CONTENT, STYLES if X in INFILE.namelist()]: adom[afile] = xml.dom.minidom.parseString(INFILE.read(afile)) cstyles = adom[afile].getElementsByTagName(STYLE) text_styles.extend([X for X in cstyles if X.getAttribute(SFAMILY) == 'text']) text_style_names = [X.getAttribute(SNAME) for X in text_styles] cdom = adom[CONTENT] # Track how many spans of each style. Names should be unique style_counts = dict.fromkeys(text_style_names, 0) n, k = len(text_style_names), len(style_counts) if n != k: print('NOTE: %d text_style_names, %d unique' % (n, k)) for aspan in [X for X in cdom.getElementsByTagName(TSPAN) if X.hasAttribute(TSTYLENAME)]: astyle = aspan.getAttribute(TSTYLENAME) if astyle not in style_counts: print "? unknown style: %s" % astyle style_counts[astyle] = style_counts.get(astyle, 0) + 1 if '-l' in ops: # print text_styles for aname, astyle in zip(text_style_names, text_styles): if style_counts[aname]: print '%s: %d spans' % (aname, style_counts[aname]) print '\t%s' % astyle.toxml() for astyle in [X for X in style_counts if X not in text_style_names]: print '??? %s: %d spans' % (astyle, style_counts[astyle]) while '-l' in ops: ops.remove('-l') # Before the following fun stuff, make stdout be utf8 utf8_enc = codecs.getencoder('utf8') for op in ops: if op.startswith('-d'): astyle = op[2:] if astyle not in style_counts: print "*** Couldn't find style %s" % astyle continue print "=== Text style %s:" % astyle for aspan in [X for X in cdom.getElementsByTagName(TSPAN) if X.getAttribute(TSTYLENAME) == astyle]: mydata = ' '.join([X.data for X in aspan.childNodes if X.nodeType == X.TEXT_NODE]) print utf8_enc(mydata)[0] continue styles = op.split('=') if len(styles) > 2: print >> sys.stderr, "Can't parse: '%s'" % op usage() if len(styles) < 2: print >> sys.stderr, 'Not yet implemented: %s' % op continue print 'Changing style "%s" to "%s".' % (styles[0], styles[1]) for aspan in [X for X in cdom.getElementsByTagName(TSPAN) if X.getAttribute(TSTYLENAME) == styles[0]]: aspan.setAttribute(TSTYLENAME, styles[1]) if os.path.exists(outfile_name): os.unlink(outfile_name) OUTFILE = zipfile.ZipFile(outfile_name, 'w', zipfile.ZIP_DEFLATED) for oldinfo in INFILE.infolist(): fname = oldinfo.filename fsize = oldinfo.file_size #print 'archive member "%s", %d bytes' % (fname, fsize) if fsize > 0: if fname == CONTENT: OUTFILE.writestr(fname, utf8_enc(cdom.toxml())[0]) else: OUTFILE.writestr(fname, INFILE.read(fname)) else: OUTFILE.writestr(fname, '') OUTFILE.close() def usage(): print >> sys.stderr, __doc__ sys.exit(1) if __name__ == '__main__': main(sys.argv[1:])