This file is indexed.

/usr/lib/ruby/vendor_ruby/action_controller/vendor/html-scanner/html/tokenizer.rb is in ruby-actionpack-2.3 2.3.14-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
require 'strscan'

module HTML #:nodoc:
  
  # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
  # token is a string. Each string represents either "text", or an HTML element.
  #
  # This currently assumes valid XHTML, which means no free < or > characters.
  #
  # Usage:
  #
  #   tokenizer = HTML::Tokenizer.new(text)
  #   while token = tokenizer.next
  #     p token
  #   end
  class Tokenizer #:nodoc:
    
    # The current (byte) position in the text
    attr_reader :position
    
    # The current line number
    attr_reader :line
    
    # Create a new Tokenizer for the given text.
    def initialize(text)
      @scanner = StringScanner.new(text)
      @position = 0
      @line = 0
      @current_line = 1
    end

    # Return the next token in the sequence, or +nil+ if there are no more tokens in
    # the stream.
    def next
      return nil if @scanner.eos?
      @position = @scanner.pos
      @line = @current_line
      if @scanner.check(/<\S/)
        update_current_line(scan_tag)
      else
        update_current_line(scan_text)
      end
    end
  
    private

      # Treat the text at the current position as a tag, and scan it. Supports
      # comments, doctype tags, and regular tags, and ignores less-than and
      # greater-than characters within quoted strings.
      def scan_tag
        tag = @scanner.getch
        if @scanner.scan(/!--/) # comment
          tag << @scanner.matched
          tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
        elsif @scanner.scan(/!\[CDATA\[/)
          tag << @scanner.matched
          tag << (@scanner.scan_until(/\]\]>/) || @scanner.scan_until(/\Z/))
        elsif @scanner.scan(/!/) # doctype
          tag << @scanner.matched
          tag << consume_quoted_regions
        else
          tag << consume_quoted_regions
        end
        tag
      end

      # Scan all text up to the next < character and return it.
      def scan_text
        "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
      end
      
      # Counts the number of newlines in the text and updates the current line
      # accordingly.
      def update_current_line(text)
        text.scan(/\r?\n/) { @current_line += 1 }
      end
      
      # Skips over quoted strings, so that less-than and greater-than characters
      # within the strings are ignored.
      def consume_quoted_regions
        text = ""
        loop do
          match = @scanner.scan_until(/['"<>]/) or break

          delim = @scanner.matched
          if delim == "<"
            match = match.chop
            @scanner.pos -= 1
          end

          text << match
          break if delim == "<" || delim == ">"

          # consume the quoted region
          while match = @scanner.scan_until(/[\\#{delim}]/)
            text << match
            break if @scanner.matched == delim
            text << @scanner.getch # skip the escaped character
          end
        end
        text
      end
  end
  
end