This file is indexed.

/usr/share/pyshared/pyth/plugins/xhtml/reader.py is in python-pyth 0.5.6-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
Read documents from xhtml
"""

import BeautifulSoup

from pyth import document
from pyth.format import PythReader
from pyth.plugins.xhtml.css import CSS


class XHTMLReader(PythReader):

    @classmethod
    def read(self, source, css_source=None, encoding="utf-8"):
        reader = XHTMLReader(source, css_source, encoding)
        return reader.go()

    def __init__(self, source, css_source=None, encoding="utf-8"):
        self.source = source
        self.css_source = css_source
        self.encoding = encoding

    def go(self):
        soup = BeautifulSoup.BeautifulSoup(self.source,
                                           convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
                                           fromEncoding=self.encoding,
                                           smartQuotesTo=None)
        # Make sure the document content doesn't use multi-lines
        soup = self.format(soup)
        doc = document.Document()
        if self.css_source:
            self.css = CSS(self.css_source)
        else:
            self.css = CSS()    # empty css
        self.process_into(soup, doc)
        return doc

    def format(self, soup):
        """format a BeautifulSoup document

        This will transform the block elements content from
        multi-lines text into single line.

        This allow us to avoid having to deal with further text
        rendering once this step has been done.
        """
        # Remove all the newline characters before a closing tag.
        for node in soup.findAll(text=True):
            if node.rstrip(" ").endswith("\n"):
                node.replaceWith(node.rstrip(" ").rstrip("\n"))
        # Join the block elements lines into a single long line
        for tag in ['p', 'li']:
            for node in soup.findAll(tag):
                text = unicode(node)
                lines = [x.strip() for x in text.splitlines()]
                text = ' '.join(lines)
                node.replaceWith(BeautifulSoup.BeautifulSoup(text))
        soup = BeautifulSoup.BeautifulSoup(unicode(soup))
        # replace all <br/> tag by newline character
        for node in soup.findAll('br'):
            node.replaceWith("\n")
        soup = BeautifulSoup.BeautifulSoup(unicode(soup))
        return soup

    def is_bold(self, node):
        """
        Return true if the BeautifulSoup node needs to be rendered as
        bold.
        """
        return (node.findParent(['b', 'strong']) is not None or
                self.css.is_bold(node))

    def is_italic(self, node):
        """
        Return true if the BeautifulSoup node needs to be rendered as
        italic.
        """
        return (node.findParent(['em', 'i']) is not None
                or self.css.is_italic(node))

    def is_sub(self, node):
        """
        Return true if the BeautifulSoup node needs to be rendered as
        sub.
        """
        return (node.findParent(['sub']) is not None
                or self.css.is_sub(node))

    def is_super(self, node):
        """
        Return true if the BeautifulSoup node needs to be rendered as
        super.
        """
        return (node.findParent(['sup']) is not None
                or self.css.is_super(node))

    def url(self, node):
        """
        return the url of a BeautifulSoup node or None if there is no
        url.
        """
        a_node = node.findParent('a')
        if not a_node:
            return None
        return a_node.get('href')

    def process_text(self, node):
        """
        Return a pyth Text object from a BeautifulSoup node or None if
        the text is empty.
        """
        text = node.string.strip()
        if not text:
            return

        # Set all the properties
        properties=dict()
        if self.is_bold(node):
            properties['bold'] = True
        if self.is_italic(node):
            properties['italic'] = True
        if self.url(node):
            properties['url'] = self.url(node)
        if self.is_sub(node):
            properties['sub'] = True
        if self.is_super(node):
            properties['super'] = True

        content=[node.string]

        return document.Text(properties, content)

    def process_into(self, node, obj):
        """
        Process a BeautifulSoup node and fill its elements into a pyth
        base object.
        """
        if isinstance(node, BeautifulSoup.NavigableString):
            text = self.process_text(node)
            if text:
                obj.append(text)
            return
        if node.name == 'p':
            # add a new paragraph into the pyth object
            new_obj = document.Paragraph()
            obj.append(new_obj)
            obj = new_obj
        elif node.name == 'ul':
            # add a new list
            new_obj = document.List()
            obj.append(new_obj)
            obj = new_obj
        elif node.name == 'li':
            # add a new list entry
            new_obj = document.ListEntry()
            obj.append(new_obj)
            obj = new_obj
        for child in node:
            self.process_into(child, obj)