This file is indexed.

/usr/lib/python2.7/dist-packages/lxml/html/_html5builder.py is in python-lxml 3.5.0-1ubuntu0.1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
"""
Legacy module - don't use in new code!

html5lib now has its own proper implementation.

This module implements a tree builder for html5lib that generates lxml
html element trees.  This module uses camelCase as it follows the
html5lib style guide.
"""

from html5lib.treebuilders import _base, etree as etree_builders
from lxml import html, etree


class DocumentType(object):

    def __init__(self, name, publicId, systemId):
        self.name = name
        self.publicId = publicId
        self.systemId = systemId

class Document(object):

    def __init__(self):
        self._elementTree = None
        self.childNodes = []

    def appendChild(self, element):
        self._elementTree.getroot().addnext(element._element)


class TreeBuilder(_base.TreeBuilder):
    documentClass = Document
    doctypeClass = DocumentType
    elementClass = None
    commentClass = None
    fragmentClass = Document

    def __init__(self, *args, **kwargs):
        html_builder = etree_builders.getETreeModule(html, fullTree=False)
        etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
        self.elementClass = html_builder.Element
        self.commentClass = etree_builder.Comment
        _base.TreeBuilder.__init__(self, *args, **kwargs)

    def reset(self):
        _base.TreeBuilder.reset(self)
        self.rootInserted = False
        self.initialComments = []
        self.doctype = None

    def getDocument(self):
        return self.document._elementTree

    def getFragment(self):
        fragment = []
        element = self.openElements[0]._element
        if element.text:
            fragment.append(element.text)
        fragment.extend(element.getchildren())
        if element.tail:
            fragment.append(element.tail)
        return fragment

    def insertDoctype(self, name, publicId, systemId):
        doctype = self.doctypeClass(name, publicId, systemId)
        self.doctype = doctype

    def insertComment(self, data, parent=None):
        if not self.rootInserted:
            self.initialComments.append(data)
        else:
            _base.TreeBuilder.insertComment(self, data, parent)

    def insertRoot(self, name):
        buf = []
        if self.doctype and self.doctype.name:
            buf.append('<!DOCTYPE %s' % self.doctype.name)
            if self.doctype.publicId is not None or self.doctype.systemId is not None:
                buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
                                                  self.doctype.systemId))
            buf.append('>')
        buf.append('<html></html>')
        root = html.fromstring(''.join(buf))

        # Append the initial comments:
        for comment in self.initialComments:
            root.addprevious(etree.Comment(comment))

        # Create the root document and add the ElementTree to it
        self.document = self.documentClass()
        self.document._elementTree = root.getroottree()

        # Add the root element to the internal child/open data structures
        root_element = self.elementClass(name)
        root_element._element = root
        self.document.childNodes.append(root_element)
        self.openElements.append(root_element)

        self.rootInserted = True