This file is indexed.

/usr/share/doc/devhelp/tools/html2xml.py is in devhelp 3.18.1-1ubuntu5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
import os.path
import sgmllib
import string
import sys

def does_dict_have_keys (dict, keys):
    for key in keys:
	if not dict.has_key (key):
	    return 0
    if len(dict) != len(keys):
	return 0
    return 1

def walk (dict, level=0, parent=None):
    if dict.has_key ('order'):
	list = dict['order']
    else:
	list = dict.keys()
			    
    for key in list:
	if key in ['name', 'order', 'link']:
	    continue
	if dict[key].has_key ('link') and  \
  	   does_dict_have_keys (dict[key], ['link']):
	    link = dict[key]['link']
	else:
	    link = ""
	    
	if level:
	    print '*' * level, key, '-', link
	else:
	    print key, '-', link

	walk (dict[key], level + 1, dict)
		
class BookParser (sgmllib.SGMLParser):
    def __init__ (self):
	sgmllib.SGMLParser.__init__ (self)
	self.a = self.parents = []
	self.dict = {}
	self.last = self.link = ""
	self.is_a = self.level = 0
	self.first = 1

    def unknown_starttag (self, tag, attrs):
	if tag == 'a':
	    self.is_a = 1
	    for attr in attrs:
		if attr[0] == "href":
		    self.link = attr[1]
		    break
		
	if tag in ['dd', 'ul']:
	    self.parents.append (self.last)
	    self.level = self.level + 1
	
    def unknown_endtag (self, tag):
	if tag == 'a':
	    self.is_a = 0
	    
	if tag in ['dd', 'ul']:
	    self.level = self.level - 1
	    self.parents.pop()
	
    def handle_data (self, data):
	data = string.strip (data)
	if not data or data in [ ">", "<" ]:
	    return
	
	if self.first:
	    self.dict['name'] = data
	    self.first = 0
	    return
	    
	if data == self.dict['name'] or data in [ "Next Page", "Previous Page", "Home", "Next"]:
	    return
	
	if len (self.parents) == 0:
	    dict = self.dict
	elif len (self.parents) == 1:
	    dict = self.dict[self.parents[0]]
	elif len (self.parents) == 2:
	    dict = self.dict[self.parents[0]][self.parents[1]]
	elif len (self.parents) == 3:
	    dict = self.dict[self.parents[0]][self.parents[1]][self.parents[2]]
	else:
	    dict = None
	    
	if self.is_a:
	    if dict == None:
		return
	    
	    if not dict.has_key (data):
		dict[data] = {}		    
	    if not dict.has_key ('order'):
		dict['order'] = []
	    dict['order'].append (data)
	    dict[data]['link'] = self.link
	    
	    self.last = data

def parse_book (url):
    if os.path.exists (url + "/index.html"):
	filename = url + "/index.html"
    elif os.path.exists (url + "/book1.html"):
	filename = url + "/book1.html"
    elif os.path.exists (url):
	filename = url
    else:
	print "Error; Can't find an index :("
	raise SystemExit
    
    fd = open (filename)
    p = BookParser()
    p.feed (fd.read())
    p.close()
    return p.dict

filename = sys.argv[1]

dict =  parse_book (sys.argv[1])

print '<?xml version="1.0"?>'
print '<book title="%s"\nname=""\nbase=""\nlink="%s">' % (dict['name'], os.path.basename (sys.argv[1]))

print '<chapters>'
for chap in dict['order']:
    print '  <sub name="%s" link="%s">' % (chap, dict[chap]['link'])
    if dict[chap].has_key ('order'):
        for sub in dict[chap]['order']:
            if not does_dict_have_keys (dict[chap][sub], ['link']):
                print '    <sub name="%s" link="%s">' % (sub, dict[chap][sub]['link'])
	    
                for sub2 in dict[chap][sub]['order']:
                    print '      <sub name="%s" link="%s"/>' % (sub2, dict[chap][sub][sub2]['link'])
                print '    </sub>'
            else:
                print '    <sub name="%s" link="%s"/>' % (sub, dict[chap][sub]['link'])
                    
    print '  </sub>'
    print
    
print '</chapters>'
print
print '</book>'