This file is indexed.

/usr/lib/ruby/1.9.1/htree/parse.rb is in libhtree-ruby1.9.1 0.7-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
require 'htree/scan'
require 'htree/htmlinfo'
require 'htree/text'
require 'htree/tag'
require 'htree/leaf'
require 'htree/doc'
require 'htree/elem'
require 'htree/raw_string'
require 'htree/context'
require 'htree/encoder'
require 'htree/fstr'

module HTree
  # HTree.parse parses <i>input</i> and return a document tree.
  # represented by HTree::Doc.
  #
  # <i>input</i> should be a String or
  # an object which respond to read or open method.
  # For example, IO, StringIO, Pathname, URI::HTTP and URI::FTP are acceptable.
  # Note that the URIs need open-uri.
  #
  # HTree.parse guesses <i>input</i> is HTML or not and XML or not.
  #
  # If it is guessed as HTML, the default namespace in the result is set to http://www.w3.org/1999/xhtml
  # regardless of <i>input</i> has XML namespace declaration or not nor even it is pre-XML HTML.
  #
  # If it is guessed as HTML and not XML, all element and attribute names are downcaseed. 
  #
  # If opened file or read content has charset method,
  # HTree.parse decode it according to $KCODE before parsing.
  # Otherwise HTree.parse assumes the character encoding of the content is
  # compatible to $KCODE.
  # Note that the charset method is provided by URI::HTTP with open-uri.
  def HTree.parse(input)
    HTree.with_frozen_string_hash {
      parse_as(input, false)
    }
  end

  # HTree.parse_xml parses <i>input</i> as XML and
  # return a document tree represented by HTree::Doc.
  #
  # It behaves almost same as HTree.parse but it assumes <i>input</i> is XML
  # even if no XML declaration.
  # The assumption causes following differences.
  # * doesn't downcase element name.
  # * The content of <script> and <style> element is PCDATA, not CDATA.
  def HTree.parse_xml(input)
    HTree.with_frozen_string_hash {
      parse_as(input, true)
    }
  end

  # :stopdoc:

  def HTree.parse_as(input, is_xml)
    input_charset = nil
    if input.tainted? && 1 <= $SAFE
      raise SecurityError, "input tainted"
    end
    if input.respond_to? :read # IO, StringIO
      input = input.read.untaint
      input_charset = input.charset if input.respond_to? :charset
    elsif input.respond_to? :open # Pathname, URI with open-uri
      input.open {|f|
        input = f.read.untaint
        input_charset = f.charset if f.respond_to? :charset
      }
    end
    if input_charset && input_charset != Encoder.internal_charset
      input = Iconv.conv(Encoder.internal_charset, input_charset, input)
    end

    tokens = []
    is_xml, is_html = HTree.scan(input, is_xml) {|token|
      tokens << token
    }
    context = is_html ? HTMLContext : DefaultContext
    structure_list = parse_pairs(tokens, is_xml, is_html)
    structure_list = fix_structure_list(structure_list, is_xml, is_html)
    nodes = structure_list.map {|s| build_node(s, is_xml, is_html, context) }
    Doc.new(nodes)
  end

  def HTree.parse_pairs(tokens, is_xml, is_html)
    stack = [[nil, nil, []]]
    tokens.each {|token|
      case token[0]
      when :stag
        stag_raw_string = token[1]
        stagname = stag_raw_string[Pat::Name]
        stagname = stagname.downcase if !is_xml && is_html
        stagname = HTree.frozen_string(stagname)
        stack << [stagname, stag_raw_string, []]
      when :etag
        etag_raw_string = token[1]
        etagname = etag_raw_string[Pat::Name]
        etagname = etagname.downcase if !is_xml && is_html
        etagname = HTree.frozen_string(etagname)
        matched_elem = nil
        stack.reverse_each {|elem|
          stagname, _, _ = elem
          if stagname == etagname
            matched_elem = elem
            break
          end
        }
        if matched_elem
          until matched_elem.equal? stack.last
            stagname, stag_raw_string, children = stack.pop
            stack.last[2] << [:elem, stag_raw_string, children]
          end
          stagname, stag_raw_string, children = stack.pop
          stack.last[2] << [:elem, stag_raw_string, children, etag_raw_string]
        else
          stack.last[2] << [:bogus_etag, etag_raw_string]
        end
      else
        stack.last[2] << token
      end
    }
    elem = nil
    while 1 < stack.length
      stagname, stag_raw_string, children = stack.pop
      stack.last[2] << [:elem, stag_raw_string, children]
    end
    stack[0][2]
  end

  def HTree.fix_structure_list(structure_list, is_xml, is_html)
    result = []
    rest = structure_list.dup
    until rest.empty?
      structure = rest.shift
      if structure[0] == :elem
        elem, rest2 = fix_element(structure, [], [], is_xml, is_html)
        result << elem
        rest = rest2 + rest
      else
        result << structure
      end
    end
    result
  end

  def HTree.fix_element(elem, excluded_tags, included_tags, is_xml, is_html)
    stag_raw_string = elem[1]
    children = elem[2]
    if etag_raw_string = elem[3]
      return [:elem, stag_raw_string, fix_structure_list(children, is_xml, is_html), etag_raw_string], []
    else
      tagname = stag_raw_string[Pat::Name]
      tagname = tagname.downcase if !is_xml && is_html
      if ElementContent[tagname] == :EMPTY
        return [:elem, stag_raw_string, []], children
      else
        if ElementContent[tagname] == :CDATA
          possible_tags = []
        else
          possible_tags = ElementContent[tagname]
        end
        if possible_tags
          excluded_tags2 = ElementExclusions[tagname]
          included_tags2 = ElementInclusions[tagname]
          excluded_tags |= excluded_tags2 if excluded_tags2
          included_tags |= included_tags2 if included_tags2
          containable_tags = (possible_tags | included_tags) - excluded_tags
          uncontainable_tags = ElementContent.keys - containable_tags
        else
          # If the tagname is unknown, it is assumed that any element
          # except excluded can be contained.
          uncontainable_tags = excluded_tags
        end
        fixed_children = []
        rest = children
        until rest.empty?
          if rest[0][0] == :elem
            elem = rest.shift
            elem_tagname = elem[1][Pat::Name]
            elem_tagname = elem_tagname.downcase if !is_xml && is_html
            if uncontainable_tags.include? elem_tagname
              rest.unshift elem
              break
            else
              fixed_elem, rest2 = fix_element(elem, excluded_tags, included_tags, is_xml, is_html)
              fixed_children << fixed_elem
              rest = rest2 + rest
            end
          else
            fixed_children << rest.shift
          end
        end
        return [:elem, stag_raw_string, fixed_children], rest
      end
    end
  end

  def HTree.build_node(structure, is_xml, is_html, inherited_context=DefaultContext)
    case structure[0]
    when :text_pcdata
      Text.parse_pcdata(structure[1])
    when :elem
      _, stag_rawstring, children, etag_rawstring = structure
      etag = etag_rawstring && ETag.parse(etag_rawstring, is_xml, is_html)
      stag = STag.parse(stag_rawstring, true, is_xml, is_html, inherited_context)
      if !children.empty? || etag ||
         stag.element_name.namespace_uri != 'http://www.w3.org/1999/xhtml' ||
         HTree::ElementContent[stag.element_name.local_name] != :EMPTY
        Elem.new!(stag,
                  children.map {|c| build_node(c, is_xml, is_html, stag.context) },
                  etag)
      else
        Elem.new!(stag)
      end
    when :emptytag
      Elem.new!(STag.parse(structure[1], false, is_xml, is_html, inherited_context))
    when :bogus_etag
      BogusETag.parse(structure[1], is_xml, is_html)
    when :xmldecl
      XMLDecl.parse(structure[1])
    when :doctype
      DocType.parse(structure[1], is_xml, is_html)
    when :procins
      ProcIns.parse(structure[1])
    when :comment
      Comment.parse(structure[1])
    when :text_cdata_content
      Text.parse_cdata_content(structure[1])
    when :text_cdata_section
      Text.parse_cdata_section(structure[1])
    else
      raise Exception, "[bug] unknown structure: #{structure.inspect}"
    end
  end

  def STag.parse(raw_string, is_stag, is_xml, is_html, inherited_context=DefaultContext)
    attrs = []
    if (is_stag ? /\A#{Pat::ValidStartTag_C}\z/o : /\A#{Pat::ValidEmptyTag_C}\z/o) =~ raw_string
      qname = $1
      $2.scan(Pat::ValidAttr_C) {
        attrs << ($5 ? [nil, $5] : [$1, $2 || $3 || $4])
      }
    elsif (is_stag ? /\A#{Pat::InvalidStartTag_C}\z/o : /\A#{Pat::InvalidEmptyTag_C}\z/o) =~ raw_string
      qname = $1
      last_attr = $3
      $2.scan(Pat::InvalidAttr1_C) {
        attrs << ($5 ? [nil, $5] : [$1, $2 || $3 || $4])
      }
      if last_attr
        /#{Pat::InvalidAttr1End_C}/o =~ last_attr
        attrs << [$1, $2 || $3]
      end
    else
      raise HTree::Error, "cannot recognize as start tag or empty tag: #{raw_string.inspect}"
    end

    qname = qname.downcase if !is_xml && is_html

    attrs.map! {|aname, aval|
      if aname
        aname = (!is_xml && is_html) ? aname.downcase : aname
        [aname, Text.parse_pcdata(aval)]
      else
        if val2name = OmittedAttrName[qname]
          aval_downcase = aval.downcase
          aname = val2name.fetch(aval_downcase, aval_downcase)
        else
          aname = aval
        end
        [aname, Text.new(aval)]
      end
    }

    result = STag.new(qname, attrs, inherited_context)
    result.raw_string = raw_string
    result
  end

  def ETag.parse(raw_string, is_xml, is_html)
    unless /\A#{Pat::EndTag_C}\z/o =~ raw_string
      raise HTree::Error, "cannot recognize as end tag: #{raw_string.inspect}"
    end

    qname = $1
    qname = qname.downcase if !is_xml && is_html

    result = self.new(qname)
    result.raw_string = raw_string
    result
  end

  def BogusETag.parse(raw_string, is_xml, is_html)
    unless /\A#{Pat::EndTag_C}\z/o =~ raw_string
      raise HTree::Error, "cannot recognize as end tag: #{raw_string.inspect}"
    end

    qname = $1
    qname = qname.downcase if !is_xml && is_html

    result = self.new(qname)
    result.raw_string = raw_string
    result
  end

  def Text.parse_pcdata(raw_string)
    fixed = raw_string.gsub(/&(?:(?:#[0-9]+|#x[0-9a-fA-F]+|([A-Za-z][A-Za-z0-9]*));?)?/o) {|s|
      name = $1
      case s
      when /;\z/
        s
      when /\A&#/
        "#{s};"
      when '&'
        '&amp;'
      else 
        if NamedCharactersPattern =~ name
          "&#{name};"
        else
          "&amp;#{name}"
        end
      end
    }
    fixed = raw_string if fixed == raw_string
    result = Text.new_internal(fixed)
    result.raw_string = raw_string
    result
  end

  def Text.parse_cdata_content(raw_string)
    result = Text.new(raw_string)
    result.raw_string = raw_string
    result
  end

  def Text.parse_cdata_section(raw_string)
    unless /\A#{Pat::CDATA_C}\z/o =~ raw_string
      raise HTree::Error, "cannot recognize as CDATA section: #{raw_string.inspect}"
    end

    content = $1

    result = Text.new(content)
    result.raw_string = raw_string
    result
  end

  def XMLDecl.parse(raw_string)
    unless /\A#{Pat::XmlDecl_C}\z/o =~ raw_string
      raise HTree::Error, "cannot recognize as XML declaration: #{raw_string.inspect}"
    end

    version = $1 || $2
    encoding = $3 || $4
    case $5 || $6
    when 'yes'
      standalone = true
    when 'no'
      standalone = false
    else
      standalone = nil
    end

    result = XMLDecl.new(version, encoding, standalone)
    result.raw_string = raw_string
    result
  end

  def DocType.parse(raw_string, is_xml, is_html)
    unless /\A#{Pat::DocType_C}\z/o =~ raw_string
      raise HTree::Error, "cannot recognize as XML declaration: #{raw_string.inspect}"
    end

    root_element_name = $1
    public_identifier = $2 || $3
    system_identifier = $4 || $5

    root_element_name = root_element_name.downcase if !is_xml && is_html

    result = DocType.new(root_element_name, public_identifier, system_identifier)
    result.raw_string = raw_string
    result
  end

  def ProcIns.parse(raw_string)
    unless /\A#{Pat::XmlProcIns_C}\z/o =~ raw_string
      raise HTree::Error, "cannot recognize as processing instruction: #{raw_string.inspect}"
    end

    target = $1
    content = $2

    result = ProcIns.new(target, content)
    result.raw_string = raw_string
    result
  end

  def Comment.parse(raw_string)
    unless /\A#{Pat::Comment_C}\z/o =~ raw_string
      raise HTree::Error, "cannot recognize as comment: #{raw_string.inspect}"
    end

    content = $1

    result = Comment.new(content)
    result.raw_string = raw_string
    result
  end

  # :startdoc:
end