diff options
| author | David Kaufmann <astra@fsinf.at> | 2011-11-14 02:04:57 +0100 |
|---|---|---|
| committer | David Kaufmann <astra@fsinf.at> | 2011-11-14 02:05:03 +0100 |
| commit | d14e32945633316b352efec22a1ab4ffd00cb618 (patch) | |
| tree | 353b04d57ab9134d1f546440395d75754a69542f /src/vim-latex/doc/db2vim | |
| parent | a76ddbccedbd9873342629d07fdb0cd8ba536cc0 (diff) | |
| download | config-d14e32945633316b352efec22a1ab4ffd00cb618.tar.gz | |
moved sources to src
Diffstat (limited to 'src/vim-latex/doc/db2vim')
| -rwxr-xr-x | src/vim-latex/doc/db2vim/db2vim | 761 | ||||
| -rw-r--r-- | src/vim-latex/doc/db2vim/domutils.py | 25 | ||||
| -rw-r--r-- | src/vim-latex/doc/db2vim/textutils.py | 224 |
3 files changed, 1010 insertions, 0 deletions
diff --git a/src/vim-latex/doc/db2vim/db2vim b/src/vim-latex/doc/db2vim/db2vim new file mode 100755 index 0000000..d1e6902 --- /dev/null +++ b/src/vim-latex/doc/db2vim/db2vim @@ -0,0 +1,761 @@ +#!/usr/bin/python +r""" +db2vim [options] file.xml + +SHORT OPTIONS + +-d Prints some debugging information on stderr. + +-s If given, the db2vim operates in a 'stict' conversion mode, i.e, any + element which does not have a handler defined for them it be + completeley ignored including all its children. Otherwise, db2vim will + recurse into an unknown tag and process any of its children it + recognizes. Since db2vim always recognizes text nodes, not using this + option has the effect that all text will be printed out, even if + somewhat incorrectly. + +LONG OPTIONS + +--prefix=<prefix> + This is a string like "ls_" which will be prepended to the section + numbers. Default to 'ls_' if unsupplied. +""" + + +import xml.dom.minidom +import getopt +import string +import re +import sys + +# Okay. so I import *. Shoot me. +from textutils import * +from domutils import * + +# define a bunch of constants for formatting. +TEXT_WIDTH = 80 +BLOCK_QUOTE = 4 +COL_SPACE = 2 + +# a bunch of globals used in creating the Table of contents. +# +# TOC_HASH['section 1.1 label'] = 'ls_1_1' +# +# LEVEL_HASH['section 1.1 label'] = 1 +# (top level article has level 0) +# +# TITLE_HASH['section 1.1 label'] = 'Title of section 1.1' +# +# FILENAME = the name of the file being processed with the last extension +# changed to .txt +# +# TOC_PREFIX = 'ls_' (the prefix used to create the section labels). +TOC_HASH = {} +LEVEL_HASH = {} +TITLE_HASH = {} +FILENAME = '' +TOC_PREFIX = '' + +ANCHOR_HASH = {} +URL_HASH = {} + +# STDERR for printing debugging info. +DEBUG = 0 +STDERR = sys.stderr +STRICT = 0 +NUM_ANCHORS = {0:1} + +################################################################################ +# Miscellaneous utility functions +################################################################################ +# encodeTo52(num) {{{ +def encodeTo52(num): + ret = '' + + if num < 26: + return unichr(ord('a') + num) + elif num < 52: + return unichr(ord('A') + num - 26) + else: + return encodeTo52(int(num/52)) + encodeTo52(num % 52) +# }}} +# makeTocHash(rootElement) {{{ +def makeTocHash(rootElement, width, prefix='', level=0): + retText = "" + sectionsTable = [] + lastLabelUsed = 0 + + for section in rootElement.getChildrenByTagName('section'): + title = section.getChildrenByTagName('title')[0] + titleText = handleElement(title, width) + lastLabelUsed += 1 + thisLabel = TOC_PREFIX + prefix + str(lastLabelUsed) + + sectionid = section.getAttribute('id') + if not sectionid: + section.setAttribute('id', thisLabel) + sectionid = thisLabel + + NUM_ANCHORS[0] += 1 + ANCHOR_HASH[sectionid] = TOC_PREFIX + 'a_' + encodeTo52(NUM_ANCHORS[0] + 52) + + TOC_HASH[sectionid] = thisLabel + LEVEL_HASH[sectionid] = level + TITLE_HASH[sectionid] = titleText + + if section.getChildrenByTagName('section'): + childText = makeTocHash(section, width - 5, + prefix = prefix+str(lastLabelUsed) + '_', + level = level + 1) + +# }}} +# makeAnchorHash(rootElement) {{{ +def makeAnchorHash(rootElement): + anchors = rootElement.getElementsByTagName('anchor') + rootElement.getElementsByTagName('note') + numAnchors = 0 + for anchor in anchors: + if not anchor.getAttribute('id'): + continue + + NUM_ANCHORS[0] += 1 + if ANCHOR_HASH.has_key(anchor.getAttribute('id')) or TOC_HASH.has_key(anchor.getAttribute('id')): + print >> STDERR, "Warning: anchor [%s] multiply defined" % anchor.getAttribute('id') + + ANCHOR_HASH[anchor.getAttribute('id')] = TOC_PREFIX + 'a_' + encodeTo52(NUM_ANCHORS[0] + 52) + +# }}} +# makeURLHash(rootElement) {{{ +def makeURLHash(rootElement): + urls = rootElement.getElementsByTagName('ulink') + numURLs = 0 + for url in urls: + if not url.getAttribute('url') or URL_HASH.has_key(url.getAttribute('url')): + continue + numURLs += 1 + URL_HASH[url.getAttribute('url')] = TOC_PREFIX + 'u_' + str(numURLs) + +# }}} +# makeTOC(node, width, prefix='', level=0, maxleve=1): {{{ +def makeTOC(node, width, maxlevel=1): + retText = "" + sectionsTable = [] + lastLabelUsed = 0 + + for section in node.getChildrenByTagName('section'): + + sectionid = section.getAttribute('id') + thisLabel = TOC_HASH.get(sectionid, '') + titleText = TITLE_HASH.get(sectionid, '') + level = LEVEL_HASH.get(sectionid, 10) + + if level <= maxlevel: + retText += '|' + thisLabel + '| ' + titleText + '\n' + + if level < maxlevel and section.getChildrenByTagName('section'): + childText = makeTOC(section, width-5) + retText += VertCatString(" ", 4, childText) + '\n' + + retText = re.sub(r'\s+$', r'\n', retText) + + return retText +# }}} + +################################################################################ +# Generalized function for handling dom elements. +################################################################################ +# IsInlineTag(self): {{{ +def IsInlineTag(self): + if self.nodeType == self.TEXT_NODE: + return 1 + elif inlineTags.get(self.tagName, 0): + return 1 + else: + return 0 + + +# }}} +# getChildrenByTagName(self, name): {{{ +# Description: extension to the xml.dom.minidom.Element class. +# returns all direct descendants of this Element. +def getChildrenByTagName(self, name): + nodeList = [] + + child = self.firstChild + while not child is None: + if child.nodeType == child.ELEMENT_NODE and child.nodeName == name: + nodeList.append(child) + + child = child.nextSibling + + return nodeList + +xml.dom.minidom.Element.getChildrenByTagName = getChildrenByTagName + + +# }}} +# handleElement(rootElement, width=TEXT_WIDTH): {{{ +def handleElement(rootElement, width=TEXT_WIDTH): + """ + handleElement(rootElement, width=TEXT_WIDTH): + + Generalized function to handle an Element node in a DOM tree. + """ + + retText = "" + child = rootElement.firstChild + while not child is None: + + printerr('node type = %d' % child.nodeType) + if child.nodeType == child.ELEMENT_NODE: + printerr('processing [%s]' % child.tagName) + + isinline = IsInlineTag(child) + + # if the child is an Element and if a handler exists, then call it. + if not isinline \ + and child.nodeType == child.ELEMENT_NODE \ + and handlerMaps.has_key(child.tagName): + # offset the child text by the current indentation value + printerr('making recursive call to known child.') + retText += handlerMaps[child.tagName](child, width) + child = child.nextSibling + + elif not isinline \ + and child.nodeType == child.PROCESSING_INSTRUCTION_NODE \ + and child.target == 'vimhelp': + + if handlerMaps.has_key(child.data): + retText += handlerMaps[child.data](child, width) + + child = child.nextSibling + + # if its a text node or an inline element node, collect consecutive + # text nodes into a single paragraph and indent it. + elif isinline: + + text = "" + while not child is None and IsInlineTag(child): + if child.nodeType == child.TEXT_NODE: + text += child.data + elif child.nodeType == child.ELEMENT_NODE: + if handlerMaps.has_key(child.tagName): + text += handlerMaps[child.tagName](child, width) + else: + text += GetText(child.childNodes) + child = child.nextSibling + + retText += IndentParagraphs(text, width) + + # If we cannot understand _anything_ about the element, then just + # handle its children hoping we have something to gather from + # there. + elif not STRICT: + printerr('making recursive call for unkown child') + retText += handleElement(child, width) + child = child.nextSibling + + else: + child = child.nextSibling + + return retText + +# }}} + +################################################################################ +# Functions for handling various xml tags +################################################################################ +# handleArticleInfo(articleinfo, width): {{{ +def handleArticleInfo(articleinfo, width): + + makeTocHash(articleinfo.parentNode, width) + makeAnchorHash(articleinfo.parentNode) + makeURLHash(articleinfo.parentNode) + + title = articleinfo.getChildrenByTagName('title') + if title is None: + print("Article should have a title!") + sys.exit(1) + + name = GetText(title[0].childNodes) + authors = articleinfo.getChildrenByTagName('author') + + authorText = '' + for author in authors: + firstname = '' + surname = '' + if author.getElementsByTagName('firstname'): + firstname = GetTextFromElementNode(author, 'firstname')[0] + if author.getChildrenByTagName('surname'): + surname = GetTextFromElementNode(author, 'surname')[0] + if author.getElementsByTagName('email'): + email = GetTextFromElementNode(author, 'email')[0] + authorText = authorText + firstname + ' ' + surname + ' <' + email + '>\n' + + + abstractText = '' + abstract = articleinfo.getChildrenByTagName('abstract') + if abstract is not None: + abstractText = '\n\n' + CenterText('Abstract\n========', width) + abstractText += handleElement(abstract[0], width) + '\n' + + + retText = CenterText(name + '\n*' + FILENAME + '*\n' + authorText, width) + retText += abstractText + + toc = makeTOC(articleinfo.parentNode, width) + + foldwarn = r''' +================================================================================ +Viewing this file + +This file can be viewed with all the sections and subsections folded to ease +navigation. By default, vim does not fold help documents. To create the folds, +press za now. The folds are created via a foldexpr which can be seen in the +last section of this file. + +See |usr_28.txt| for an introduction to folding and |fold-commands| for key +sequences and commands to work with folds. +''' + + return retText + '\n' + RightJustify('*' + FILENAME + '-toc*', width) + '\n' + toc + foldwarn + +# }}} +# handleOption(option, width): {{{ +def handleOption(option, width): + retText = "" + names = GetTextFromElementNode(option, "name") + + for name in names: + retText += string.rjust("*"+name+"*", width) + "\n" + + nameTexts = "" + maxNameLen = -1 + for name in names: + maxNameLen = max(maxNameLen, len(name + " ")) + nameTexts += name + " \n" + + desc = option.getChildrenByTagName("desc")[0] + descText = handleElement(desc, width=width-maxNameLen) + + retText += VertCatString(nameTexts + " ", None, descText) + + return retText + "\n" + +# }}} +# handleOptionDefault(default, width): {{{ +def handleOptionDefault(default, width): + type = string.join(GetTextFromElementNode(default, "type"), "\n") + extra = string.join(GetTextFromElementNode(default, "extra"), "\n") + return type + "\t(" + extra + ")" + +# }}} +# handleTableRoot(root, width): {{{ +def handleTableRoot(root, width): + tgroup = root.getChildrenByTagName('tgroup')[0] + if tgroup is None: + return '' + + rows = [] + numHeadRows = 0 + if tgroup.getChildrenByTagName('thead'): + thead = tgroup.getChildrenByTagName('thead')[0] + rows = thead.getChildrenByTagName('row') + numHeadRows = len(rows) + + tbody = tgroup.getChildrenByTagName('tbody')[0] + rows += tbody.getChildrenByTagName('row') + + widths, text = calculateColumnWidthsDoublePass(rows, width) + + headText = text[0:numHeadRows] + bodyText = text[numHeadRows:] + + headTable = FormatTable(headText, ROW_SPACE = 1, COL_SPACE = + COL_SPACE, justify = 0, widths = widths) + if headTable: + headTable = re.sub(r'\n|$', '\g<0>~', headTable) + bodyTable = FormatTable(bodyText, ROW_SPACE = 1, COL_SPACE = + COL_SPACE, justify = 0, widths = widths) + + return headTable + '\n'+ re.sub(r'\n+$', '', bodyTable) + '\n\n' + +# calculateColumnWidths(rows, width): {{{ +def calculateColumnWidths(rows, alloc_widths): + widths = {} + text = [] + for row in rows: + cols = row.getChildrenByTagName("entry") + if len(alloc_widths) == 1: + alloc_widths *= len(cols) + + colwidths = [] + rowtext = [] + for col, width in zip(cols, alloc_widths): + coltext = handleElement(col, width) + + rowtext.append(coltext) + # This is the 'width' of the current cell including the + # whitespace padding. + colwidths.append(max(map(len, coltext.split("\n"))) \ + + COL_SPACE) + + text.append(rowtext) + + # update the widths of the columns by finding the maximum + # width of all cells in this column. + for i in range(len(colwidths)): + widths[i] = max(colwidths[i], widths.get(i, -1)) + + return widths, text + +# }}} +# calculateColumnWidthsDoublePass(rows, width): {{{ +def calculateColumnWidthsDoublePass(rows, width): + maxwidths, text = calculateColumnWidths(rows, [width]) + if reduce(lambda x, y: x+y, maxwidths.values()) <= width: + return maxwidths, text + + # now find out how many columns exceed the maximum permitted width. + # nlarge: number of columns which are too wide. + # remainingWidth: width which these large columns can share. + nlarge = 0 + remainingWidth = width + for colwidth in maxwidths.values(): + if colwidth > width/len(maxwidths): + nlarge += 1 + else: + remainingWidth += -colwidth + + # newmaxwidth: width which each of the large columns is allowed. + newmaxwidth = remainingWidth/max(nlarge, 1) + + newcolwidths = [] + for colwidth in maxwidths.values(): + newcolwidths += [min(colwidth, newmaxwidth)] + + # make another run and this time ask each cell to restrict itself to + # newmaxwidth as calculated above. + newmaxwidth, newtext = calculateColumnWidths(rows, newcolwidths) + + return newmaxwidth, newtext + +# }}} +# }}} +# handleCode(code, width): {{{ +def handleCode(code, width): + retText = GetText(code.childNodes) + return " &codebegin;\n" + VertCatString(" ", 4, retText) + "&codeend;" + + +# }}} +# handleList(list, width, marker=0): {{{ +def handleList(list, width, marker=0): + if list.tagName == 'simplelist': + child = 'member' + decoration = '' + elif list.tagName == 'orderedlist': + child = 'listitem' + else: + child = 'member' + decoration = '- ' + + retText = "" + items = list.getChildrenByTagName(child) + i = 1 + + for item in items: + if list.tagName == 'orderedlist': + decoration = str(i) + '. ' + i = i + 1 + itemText = handleElement(item, width - len(decoration)) + itemText = VertCatString(decoration, None, itemText) + + retText += '\n' + re.sub(r'\s+$', '', itemText) + "\n" + + return retText + +# }}} +# handleNote(note, width): {{{ +def handleNote(note, width): + title = None + if note.getChildrenByTagName('title'): + title = note.getChildrenByTagName('title')[0] + name = GetText(title.childNodes) + note.removeChild(title) + + noteid = '' + if note.getAttribute('id'): + noteTagText = '*' + note.getAttribute('id') + '* ' + noteTagText += '*' + ANCHOR_HASH[note.getAttribute('id')] + '*' + noteTagText = IndentParagraphs(noteTagText, width/2) + noteid = RightJustify(noteTagText, width) + '\n' + + noteText = handleElement(note, width-len("NOTE: ")) + if title is not None: + noteText = name + '\n' +('-' * len(name)) + '\n' + noteText + + noteText = noteid + VertCatString("NOTE: ", None, noteText) + + return noteText + "\n" + +# }}} +# handleParagraph(paragraph, width): {{{ +def handleParagraph(paragraph, width): + partext = handleElement(paragraph, width) + + partext = re.sub(r'\n+$', '', partext) + partext = re.sub(r'^\n+', '', partext) + + return partext + "\n\n" + +# }}} +# handleFormalParagraph(paragraph, width): {{{ +def handleFormalParagraph(formalparagraph, width): + title = None + if formalparagraph.getChildrenByTagName('title'): + title = formalparagraph.getChildrenByTagName('title')[0] + name = GetText(title.childNodes) + formalparagraph.removeChild(title) + + partext = handleElement(formalparagraph, width) + + partext = re.sub(r'\n+$', '', partext) + partext = re.sub(r'^\n+', '', partext) + if title is not None: + partext = name + '\n' + ('-' * len(name)) + '\n' + partext + + return partext + "\n\n" + +# }}} +# handleBlockQuote(block, width): {{{ +def handleBlockQuote(block, width): + text = handleElement(block, width - BLOCK_QUOTE) + text = VertCatString(" "*BLOCK_QUOTE, \ + BLOCK_QUOTE, text) + + return text + "\n" + +# }}} +# handleLink(link, width): {{{ +def handleLink(link, width): + linkend = link.getAttribute('linkend') + if not ANCHOR_HASH.has_key(linkend): + print >> STDERR, "Warning: Link ID [%s] not found in TOC" % linkend + text = handleElement(link, width) + anchorpt = ANCHOR_HASH.get(linkend) + if not anchorpt: + anchorpt = '' + + return text + ' [|' + anchorpt + '|]' + +# }}} +# handleAnchor(anchor, width): {{{ +def handleAnchor(anchor, width): + anchorText = '*'+anchor.getAttribute('id')+'* ' + anchorText += '*'+ANCHOR_HASH[anchor.getAttribute('id')]+'*' + return RightJustify(anchorText, width) \ + + "\n" + +# }}} +# handleSection(section, width): {{{ +def handleSection(section, width): + title = section.getChildrenByTagName('title')[0] + name = handleElement(title, width) + + sectionid = section.getAttribute('id') + tagsformatted = '' + if TOC_HASH.has_key(sectionid): + tagsformatted = '*%s* ' % TOC_HASH[sectionid] + + if ANCHOR_HASH.has_key(sectionid): + tagsformatted += '*%s* ' % ANCHOR_HASH[sectionid] + + if sectionid and TOC_HASH.has_key(sectionid) and sectionid != TOC_HASH[sectionid]: + tagsformatted += '*%s*' % sectionid + + # try to indent to a width of 20 + tagsformatted = RightJustify(IndentParagraphs(tagsformatted, 30), 0) + tagswidth = TextWidth(tagsformatted) + + # width(name) + nspaces + width(tags) = 80 + if len(tagsformatted) > 2: + header = VertCatString(name, 80-tagswidth, tagsformatted) + else: + header = name + + section.removeChild(title) + text = handleElement(section, width) + + thislevel = LEVEL_HASH.get(sectionid, -1) + if thislevel == 0: + delim = '=' + newlines = '\n\n' + elif thislevel == 1: + delim = '-' + newlines = '\n' + else: + delim = '' + newlines = '\n' + + thisTOC = '' + if thislevel <= 1: + thisTOC = makeTOC(section, width, maxlevel=1) + + return "\n" + (delim * TEXT_WIDTH) + \ + "\n" + header + newlines + thisTOC + newlines + re.sub(r'\n+$', '', text) + "\n" + +# }}} +# handleUlink(ulink, width) {{{ +def handleUlink(ulink, width): + url = ulink.getAttribute('url') + text = handleElement(ulink) + # URL_HASH is created at the very beginning + if url: + return text + ' |%s|' % URL_HASH[url] + else: + print >> STDERR, "Warning: url attribute empty for [%s]" % text + return text + +# }}} +# handleIndexTerm(indexterm, width) {{{ +def handleIndexTerm(indexterm, width) : + return '' +# }}} +# handleEmphasis(emphasis, width) {{{ +def handleEmphasis(emphasis, width): + return '_' + GetText(emphasis.childNodes) + '_' +# }}} + +################################################################################ +# A dictionary for mapping xml tags to functions. +################################################################################ +# {{{ +handlerMaps = { + 'articleinfo': handleArticleInfo, + 'table': handleTableRoot, + 'informaltable': handleTableRoot, + 'code': handleCode, + 'programlisting': handleCode, + 'list': handleList, + 'simplelist': handleList, + 'orderedlist': handleList, + 'para': handleParagraph, + 'formalpara': handleFormalParagraph, + 'note': handleNote, + 'link': handleLink, + 'anchor': handleAnchor, + 'section': handleSection, + 'blockquote': handleBlockQuote, + 'ulink': handleUlink, + 'emphasis': handleEmphasis, + 'indexterm': handleIndexTerm +} +inlineTags = {'tag':1, 'literal':1, 'link':1, + 'ulink':1, 'citetitle':1, 'indexterm':1, + 'emphasis':1, 'filename':1 } +# }}} + +# helper functions for usage() and printerr() {{{ +def usage(): + print __doc__ + +def printerr(statement): + if DEBUG: + print >> STDERR, statement + +# }}} +# replaceComment(matchobj) {{{ +def replaceComment(matchobj): + initspace = matchobj.group(1) + firstsent = matchobj.group(2) + code = matchobj.group(3) + + if len(initspace) > 0: + if initspace[0] == '<': + lastspace = initspace + else: + lastspace = '<' + initspace[:-1] + else: + lastspace = initspace + + return '\n' + initspace + firstsent + ' >\n' + code + '\n' + lastspace + +# }}} +# main function {{{ +if __name__ == "__main__": + option = {} + try: + opts, args = getopt.getopt(sys.argv[1:], 'ds', ['prefix=', 'help']) + for oa, ov in opts: + option[oa] = ov + + except getopt.GetoptError: + print >> STDERR, "Usage error: db2vim --help for usage" + sys.exit(1) + + if option.has_key('--help'): + usage(); + sys.exit(0); + + TOC_PREFIX = option.get('--prefix', 'ls_') + DEBUG = option.has_key('-d') + + if len(args) != 1: + print >> STDERR, "Usage error: db2vim --help for usage" + sys.exit(1) + + fileName = args[0] + FILENAME = re.sub(r'\.\w+$', r'.txt', fileName) + + try: + fp = open(fileName) + except: + print "Error opening xml file" + + dom = xml.dom.minidom.parse(fp) + + modeline = r''' +================================================================================ +About this file + +This file was created automatically from its XML variant using db2vim. db2vim is +a python script which understands a very limited subset of the Docbook XML 4.2 +DTD and outputs a plain text file in vim help format. + +db2vim can be obtained via anonymous CVS from sourceforge.net. Use + +cvs -d:pserver:anonymous@cvs.vim-latex.sf.net:/cvsroot/vim-latex co db2vim + +Or you can visit the web-interface to sourceforge CVS at: +http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/vim-latex/db2vim/ + +The following modelines should nicely fold up this help manual. + +vim:ft=help:fdm=expr:nowrap +vim:foldexpr=getline(v\:lnum-1)=~'-\\{80}'?'>2'\:getline(v\:lnum-1)=~'=\\{80}'?'>1'\:getline(v\:lnum)=~'=\\{80}'?'0'\:getline(v\:lnum)=~'-\\{80}'?'1'\:'=' +vim:foldtext=substitute(v\:folddashes.substitute(getline(v\:foldstart),'\\s*\\*.*',"",""),'^--','\ \ \ \ \ \ ','') +================================================================================''' + + STRICT = option.has_key('-s') + + pattern = re.compile(r'\n([< ]*)([^\n]+)&codebegin;\n(.*?)&codeend;', re.DOTALL) + + processedDoc = handleElement(dom.documentElement) + while re.search('&codebegin;', processedDoc): + processedDoc = re.sub(pattern, replaceComment, processedDoc) + + urlsection = r""" +================================================================================ +URLs used in this file + +""" + labels = zip(URL_HASH.values(), URL_HASH.keys()) + labels.sort() + for label, url in labels: + urlsection += '*%s* : %s\n' % (label, url) + + processedDoc = processedDoc + urlsection + modeline + print processedDoc.encode('iso-8859-1') + +# }}} +# vim:et:sts=4:fdm=marker diff --git a/src/vim-latex/doc/db2vim/domutils.py b/src/vim-latex/doc/db2vim/domutils.py new file mode 100644 index 0000000..83351ff --- /dev/null +++ b/src/vim-latex/doc/db2vim/domutils.py @@ -0,0 +1,25 @@ +def GetTextFromElementNode(element, childNamePattern): + children = element.getElementsByTagName(childNamePattern) + texts = [] + for child in children: + texts.append(GetText(child.childNodes)) + + return texts + +def GetText(nodelist): + rc = "" + for node in nodelist: + if node.nodeType == node.TEXT_NODE: + rc = rc + node.data + return rc + +def GetTextFromElement(element): + text = "" + child = element.firstChild + while not child.nextSibling is None: + child = child.nextSibling + print child + if child.nodeType == child.TEXT_NODE: + text = text + child.data + + return text diff --git a/src/vim-latex/doc/db2vim/textutils.py b/src/vim-latex/doc/db2vim/textutils.py new file mode 100644 index 0000000..4c97c52 --- /dev/null +++ b/src/vim-latex/doc/db2vim/textutils.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python +"""Contains functions to do word-wrapping on text paragraphs.""" + +import string +import re, random +import operator + +# JustifyLine(line, width): {{{ +def JustifyLine(line, width): + """Stretch a line to width by filling in spaces at word gaps. + + The gaps are picked randomly one-after-another, before it starts + over again. + + Author: Christopher Arndt <chris.arndt@web.de + """ + i = [] + while 1: + # line not long enough already? + if len(' '.join(line)) < width: + if not i: + # index list is exhausted + # get list if indices excluding last word + i = range(max(1, len(line)-1)) + # and shuffle it + random.shuffle(i) + # append space to a random word and remove its index + line[i.pop(0)] += ' ' + else: + # line has reached specified width or wider + return ' '.join(line) + + +# }}} +# FillParagraphs(text, width=80, justify=0): {{{ +def FillParagraphs(text, width=80, justify=0): + """Split a text into paragraphs and wrap them to width linelength. + + Optionally justify the paragraphs (i.e. stretch lines to fill width). + + Inter-word space is reduced to one space character and paragraphs are + always separated by two newlines. Indention is currently also lost. + + Author: Christopher Arndt <chris.arndt@web.de + """ + # split taxt into paragraphs at occurences of two or more newlines + paragraphs = re.split(r'\n\n+', text) + for i in range(len(paragraphs)): + # split paragraphs into a list of words + words = paragraphs[i].strip().split() + line = []; new_par = [] + while 1: + if words: + if len(' '.join(line + [words[0]])) > width and line: + # the line is already long enough -> add it to paragraph + if justify: + # stretch line to fill width + new_par.append(JustifyLine(line, width)) + else: + new_par.append(' '.join(line)) + line = [] + else: + # append next word + line.append(words.pop(0)) + else: + # last line in paragraph + new_par.append(' '.join(line)) + line = [] + break + # replace paragraph with formatted version + paragraphs[i] = '\n'.join(new_par) + # return paragraphs separated by two newlines + return '\n\n'.join(paragraphs) + +# }}} +# IndentParagraphs(text, width=80, indent=0, justify=0): {{{ +def IndentParagraphs(text, width=80, indent=0, justify=0): + """Indent a paragraph, i.e: + . left (and optionally right) justify text to given width + . add an extra indent if desired. + + This is nothing but a wrapper around FillParagraphs + """ + retText = re.sub(r"^|\n", "\g<0>" + " "*indent, \ + FillParagraphs(text, width, justify)) + retText = re.sub(r"\n+$", '', retText) + return retText + + +# }}} +# OffsetText(text, indent): {{{ +def OffsetText(text, indent): + return re.sub("^|\n", "\g<0>" + " "*indent, text) + + +# }}} +# RightJustify(lines, width): {{{ +def RightJustify(lines, width): + if width == 0: + width = TextWidth(lines) + text = "" + for line in lines.split("\n"): + text += " "*(width - len(line)) + line + "\n" + + text = re.sub('\n$', '', text) + return text + +# }}} +# CenterText(lines, width): {{{ +def CenterText(lines, width): + text = '' + for line in lines.split("\n"): + text += " "*(width/2 - len(line)/2) + line + '\n' + return text + +# }}} +# TextWidth(text): {{{ +def TextWidth(text): + """ + TextWidth(text) + + returns the 'width' of the text, i.e the length of the longest segment + in the text not containing new-lines. + """ + return max(map(len, text.split('\n'))) + + +# }}} +# FormatTable(tableText, ROW_SPACE=2, COL_SPACE = 3, \ {{{ +# COL_WIDTH=30, TABLE_WIDTH=80, justify=0): +def FormatTable(tableText, ROW_SPACE=2, COL_SPACE = 3, \ + COL_WIDTH=1000, justify=0, widths=None): + """ + FormatTable(tableText [, ROW_SPACE=2, COL_SPACE = 3, COL_WIDTH=30, justify=0]) + returns string + + Given a 2 dimensional array of text as input, produces a plain text + formatted string which resembles the table output. + + The optional arguments specify the inter row/column spacing and the + column width. + """ + + # first find out the max width of the columns + # maxwidths is a dictionary, but can be accessed exactly like an + # array because the keys are integers. + + if widths is None: + widths = {} + for row in tableText: + cellwidths = map(TextWidth, row) + for i in range(len(cellwidths)): + # Using: dictionary.get(key, default) + widths[i] = max(cellwidths[i], widths.get(i, -1)) + + # Truncate each of the maximum lengths to the maximum allowed. + for i in range(0, len(widths)): + widths[i] = min(widths[i], COL_WIDTH) + + if justify: + formattedTable = [] + + for row in tableText: + formattedTable.append(map(FillParagraphs, row, \ + [COL_WIDTH]*len(row))) + else: + formattedTable = tableText + + retTableText = "" + for row in formattedTable: + rowtext = row[0] + width = widths[0] + for i in range(1, len(row)): + rowtext = VertCatString(rowtext, width, " "*COL_SPACE) + rowtext = VertCatString(rowtext, width + COL_SPACE, row[i]) + + width = width + COL_SPACE + widths[i] + + retTableText += string.join(rowtext, "") + retTableText += "\n"*ROW_SPACE + + return re.sub(r"\n+$", "", retTableText) + + +# }}} +# VertCatString(string1, width1, string2): {{{ +def VertCatString(string1, width1, string2): + """ + VertCatString(string1, width1=None, string2) + returns string + + Concatenates string1 and string2 vertically. The lines are assumed to + be "\n" seperated. + + width1 is the width of the string1 column (It is calculated if left out). + (Width refers to the maximum length of each line of a string) + + NOTE: if width1 is specified < actual width, then bad things happen. + """ + lines1 = string1.split("\n") + lines2 = string2.split("\n") + + if width1 is None: + width1 = -1 + for line in lines1: + width1 = max(width1, len(line)) + + retlines = [] + for i in range(0, max(len(lines1), len(lines2))): + if i >= len(lines1): + lines1.append(" "*width1) + + lines1[i] = lines1[i] + " "*(width1 - len(lines1[i])) + + if i >= len(lines2): + lines2.append("") + + retlines.append(lines1[i] + lines2[i]) + + return string.join(retlines, "\n") + +# }}} + +# vim:et:sts=4:fdm=marker |
