Logo Search packages:      
Sourcecode: dblatex version File versions


#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Time-stamp: <2008-06-23 22:21:26 ah>

Provide an encoder for a font specification configuration: the encoder is fed
with Unicode characters one by one and determines the needed font switches
between the preceding and the current character.

import re
import xml.dom.minidom

def _indent(string, width=2):
    """Indent the <string> lines by <width> blank characters."""
    istr = ' ' * width
    s = istr + istr.join(string.splitlines(1))
    return s

00020 class UnicodeInterval:
    """Unicode codepoint interval, including all codepoints between its minimum
    and maximum boundary.
    For any Unicode codepoint it can be queried if it belongs to the interval.

    # Internal data attributes:
    # _min_boundary: Minimum boundary of the codepoint interval (ordinal)
    # _max_boundary: Maximum boundary of the codepoint interval (ordinal)

    _re_codepoint = re.compile(r'^[Uu]\+?([0-9A-Fa-f]+)$')

    def __init__(self):
        self._min_boundary = 0
        self._max_boundary = 0

00036     def __str__(self):
        """Dump the instance's data attributes."""
        string = 'UnicodeInterval: [' + str(self._min_boundary)
        if self._max_boundary != self._min_boundary:
            string += ',' + str(self._max_boundary)
        string += ']'
        return string

00044     def _unicode_to_ordinal(self, codepoint):
        """Return the ordinal of the specified codepoint."""
        match = self._re_codepoint.match(codepoint)
        if match:
            return int(match.group(1), 16)
            raise RuntimeError, 'Not a unicode codepoint: ' + codepoint

00052     def from_char(self, char):
        """Interval for a single character"""
        self._min_boundary = ord(char)
        self._max_boundary = self._min_boundary
        return self

00058     def from_codepoint(self, codepoint):
        """Interval for a single character defined as unicode string."""
        self._min_boundary = self._unicode_to_ordinal(codepoint)
        self._max_boundary = self._min_boundary
        return self

00064     def from_interval(self, codepoint1, codepoint2):
        """Interval from a unicode range."""
        self._min_boundary = self._unicode_to_ordinal(codepoint1)
        self._max_boundary = self._unicode_to_ordinal(codepoint2)
        if self._min_boundary > self._max_boundary:
            self._min_boundary, self._max_boundary = \
                self._max_boundary, self._min_boundary
        return self

00073     def match(self, char):
        Determine whether the specified character is contained in this
        instance's interval.
        return (ord(char) >= self._min_boundary
                and ord(char) <= self._max_boundary)

00082 class FontSpec:
    Font specification, consisting of one or several unicode character
    intervals and of fonts to select for those characters. The object
    fully defines the fonts to switch to.

    # Internal data attributes:
    # _intervals: UnicodeInterval list

    transition_types = ['enter', 'inter', 'exit']
    _re_interval = re.compile(r'^([Uu][0-9A-Fa-f]+)-([Uu][0-9A-Fa-f]+)$')
    _re_codepoint = re.compile(r'^([Uu][0-9A-Fa-f]+)$')

00096     def __init__(self, intervals=None, subfont_first=False):
        """Create a font specification from the specified codepoint intervals.
        The other data attributes will be set by the caller later.
        self.type = ""
        self.id = None
        self.refmode = None
        self.transitions = {}
        self.fontspecs = [self]
        self.subfont_first = subfont_first

        for type in self.transition_types:
            self.transitions[type] = {}

        if not(intervals):
            self._intervals = []

            self._intervals = list(intervals)
        except TypeError:
            self._intervals = [intervals]

    def fromnode(self, node):
        range = node.getAttribute('range')
        charset = node.getAttribute('charset')
        id = node.getAttribute('id')
        refmode = node.getAttribute('refmode')
        self.type = node.getAttribute('type')

        if (range):
            self._intervals = self._parse_range(range)
        elif (charset):
            intervals = []
            for char in charset:
            self._intervals = intervals

        # Unique identifier
        if (id):
            self.id = id
        if (refmode):
            self.refmode = refmode

        for transition_type in self.transition_types:
            self._parse_transitions(node, transition_type)

00143     def _parse_range(self, range):
        """Parse the specified /fonts/fontspec@range attribute to a
        UnicodeInterval list.
        print range
        intervals = []
        chunks = range.split()
        for chunk in chunks:
            match = self._re_interval.match(chunk)
            #print match
            if match:
                urange = UnicodeInterval().from_interval(match.group(1),
                match = self._re_codepoint.match(chunk)
                if match:
                    raise RuntimeError, 'Unable to parse range: "' + range + '"'
        return intervals

00166     def _parse_transitions(self, node, transition_type):
        """Evaluate the font elements of the specified fontspec element for the
        specified transition type (enter, inter or exit).
        fontlist = self.transitions[transition_type]

        for dom_transition in node.getElementsByTagName(transition_type):
            for dom_font in dom_transition.getElementsByTagName('font'):
                font = ''
                types = dom_font.getAttribute("type")
                types = types.split()
                for dom_child in dom_font.childNodes:
                    if dom_child.nodeType == dom_child.TEXT_NODE:
                        font += dom_child.nodeValue
                if (font):
                    for type in types:
                        fontlist[type] = font

00184     def _switch_to(self, fonts):
        Return a string with the XeTeX font switching commands for the
        specified font types.
        s = ''
        for type, font in fonts.items():
            s += '\set%sfont{%s}' % (type, font)
        if s:
            s = r"\savefont" + s + r"\restorefont{}"
        return s

    def enter(self):
        s = self._switch_to(self.transitions["enter"])
        return s

    def exit(self):
        s = self._switch_to(self.transitions["exit"])
        return s

    def interchar(self):
        s = self._switch_to(self.transitions["inter"])
        return s

00208     def __str__(self):
        """Dump the instance's data attributes."""
        string = 'FontSpec:'
        string += '\n  Id: %s' % self.id
        string += '\n  Refmode: %s' % self.refmode
        for interval in self._intervals:
            string += '\n' + _indent(str(interval))
        return string

    def add_subfont(self, fontspec):
        print "%s -> %s" % (self.id, fontspec.id)
        if self.subfont_first:
            self.fontspecs.insert(-1, fontspec)

    def add_uranges(self, ranges, depth=1):
        # Recursively extend the supported character range
        if depth:
            for f in self.fontspecs:
                if f != self:

    def get_uranges(self):
        return self._intervals

    def contains(self, char):
        #print "%s: %s" % (self.id, self._intervals)
        for interval in self._intervals:
            if interval.match(char):
                return True
            return False

00243     def match(self, char):
        """Determine whether the font specification matches the specified
        object, thereby considering refmode.
        fontspec = None
        for fontspec in self.fontspecs:
            if fontspec.contains(char):
                #print "%s has '%s'" % (fontspec.id, char)
                    print "%s has '%s'" % (fontspec.id, str(char))
                    print "%s has '%s'" % (fontspec.id, ord(char))
                return fontspec
        return None

00259 class DefaultFontSpec(FontSpec):
    The default fontspec gives priority to its children, and 
    contains any character.
    def __init__(self):
        FontSpec.__init__(self, subfont_first=True)
    def contains(self, char):
        return True

00271 class FontSpecConfig:
    This object parses an XML fontspec configuration file and build the
    resulting fontspec tree, the root fontspec being the default font
    to apply.
    The fontspec configuration file defines the fonts to apply in order
    of precedence (and for some Unicode ranges) and the font levels (or
    subfonts) thanks to the 'refmode' attribute that links a font to its

00283     def __init__(self, conf_file):
        """Create a font specification configuration from the specified file
        (file name or file-like object).
        self.fontspecs = []
        self.fontnames = {}

        self.default_fontspec = DefaultFontSpec()

        dom_document = xml.dom.minidom.parse(conf_file)
        for dom_fontspec in dom_document.getElementsByTagName('fontspec'):
            default = dom_fontspec.getAttribute('default')
            if default:
                print "has default"
                fontspec = self.default_fontspec
                fontspec = FontSpec()


            if fontspec != self.default_fontspec:
            if fontspec.id:
                self.fontnames[fontspec.id] = fontspec



00312     def build_tree(self):
        Build the fontspec tree, the root node being the default font
        to apply. The fontspecs without a refmode (i.e. not being
        explicitely a subfont) are direct children of the default font.
        to_ignore = []
        for fontspec in self.fontspecs:
            if fontspec.type == "ignore":

            if not(fontspec.refmode):
                f = self.default_fontspec
                f = self.fontnames.get(fontspec.refmode, None)

            if (f):
                raise ValueError("wrong fontspec tree")

        # Insert the characters to ignore in fontspecs
        for f in to_ignore:

00338     def __str__(self):
        """Dump the instance's data attributes."""
        string = 'FontSpecConfig:'
        string += '\n  Fontspec list:'
        for fontspec in self.fontspecs:
            string += '\n' + _indent(str(fontspec), 4)
        return string

00347 class FontSpecEncoder:
    Encoder working with font specifications: it is fed
    with Unicode characters one by one and it inserts the needed font switches
    between the preceding and the current character.

00354     def __init__(self, configuration):
        Create a font specification encoder from the specified configuration
        file (file name or file-like object).
        self._conf = FontSpecConfig(configuration)
        self._cur_fontspec = None
        self._ref_stack = [self._conf.default_fontspec]

00363     def _switch_to(self, fontspec):
        Insert the transition string, according to the newly selected
        fontspec and the previously selected fontspec
        s = ""
        # If the font hasn't changed, just insert optional inter-char material
        if fontspec == self._cur_fontspec:
            return fontspec.interchar()

        # A new font is selected, so exit from current font stream
        if self._cur_fontspec:
            s += self._cur_fontspec.exit()

        # Enter into the current font stream
        self._cur_fontspec = fontspec
        s += fontspec.enter()
        return s

00382     def _encode(self, char):
        Select the fontspec matching the specified <char>, and switch to
        this font as current font.

        The principle to find out the fontspec is to:
        - find from the current font level a matching font
          (the current font leaf or the direct font children)
        - if no font is found try with the parent font, and so on,
          up to the default root font (that must exist).
        fontspec = self._cur_fontspec or self._conf.default_fontspec

        fontspec = fontspec.match(char)
        while not(fontspec):
            fontspec = self._ref_stack[-1].match(char)

        if fontspec != self._ref_stack[-1]:

        return self._switch_to(fontspec)

    def ignorechars(self, charset):
        "Characters to ignore in font selection (maintain the current one)"
        intervals = [ UnicodeInterval().from_char(c) for c in charset ]

00410     def encode(self, char):
        Return a string consisting of the specified character prepended by
        all necessary font switching commands.
        return (self._encode(char), char)

00417     def stop(self):
        Cleanly exit from the current fontspec
        if self._cur_fontspec:
            s = self._cur_fontspec.exit()
            self._cur_fontspec = None
            return s

Generated by  Doxygen 1.6.0   Back to index