/usr/lib/ruby/vendor_ruby/rspec/support/encoded_string.rb is in ruby-rspec-support 3.5.0c3e0m0s0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | module RSpec
module Support
# @private
class EncodedString
# Reduce allocations by storing constants.
UTF_8 = "UTF-8"
US_ASCII = "US-ASCII"
#
# In MRI 2.1 'invalid: :replace' changed to also replace an invalid byte sequence
# see https://github.com/ruby/ruby/blob/v2_1_0/NEWS#L176
# https://www.ruby-forum.com/topic/6861247
# https://twitter.com/nalsh/status/553413844685438976
#
# For example, given:
# "\x80".force_encoding("Emacs-Mule").encode(:invalid => :replace).bytes.to_a
#
# On MRI 2.1 or above: 63 # '?'
# else : 128 # "\x80"
#
# Ruby's default replacement string is:
# U+FFFD ("\xEF\xBF\xBD"), for Unicode encoding forms, else
# ? ("\x3F")
REPLACE = "?"
ENCODE_UNCONVERTABLE_BYTES = {
:invalid => :replace,
:undef => :replace,
:replace => REPLACE
}
ENCODE_NO_CONVERTER = {
:invalid => :replace,
:replace => REPLACE
}
def initialize(string, encoding=nil)
@encoding = encoding
@source_encoding = detect_source_encoding(string)
@string = matching_encoding(string)
end
attr_reader :source_encoding
delegated_methods = String.instance_methods.map(&:to_s) & %w[eql? lines == encoding empty?]
delegated_methods.each do |name|
define_method(name) { |*args, &block| @string.__send__(name, *args, &block) }
end
def <<(string)
@string << matching_encoding(string)
end
if Ruby.jruby?
def split(regex_or_string)
@string.split(matching_encoding(regex_or_string))
rescue ArgumentError
# JRuby raises an ArgumentError when splitting a source string that
# contains invalid bytes.
remove_invalid_bytes(@string).split regex_or_string
end
else
def split(regex_or_string)
@string.split(matching_encoding(regex_or_string))
end
end
def to_s
@string
end
alias :to_str :to_s
if String.method_defined?(:encoding)
private
# Encoding Exceptions:
#
# Raised by Encoding and String methods:
# Encoding::UndefinedConversionError:
# when a transcoding operation fails
# if the String contains characters invalid for the target encoding
# e.g. "\x80".encode('UTF-8','ASCII-8BIT')
# vs "\x80".encode('UTF-8','ASCII-8BIT', undef: :replace, replace: '<undef>')
# # => '<undef>'
# Encoding::CompatibilityError
# when Encoding.compatibile?(str1, str2) is nil
# e.g. utf_16le_emoji_string.split("\n")
# e.g. valid_unicode_string.encode(utf8_encoding) << ascii_string
# Encoding::InvalidByteSequenceError:
# when the string being transcoded contains a byte invalid for
# either the source or target encoding
# e.g. "\x80".encode('UTF-8','US-ASCII')
# vs "\x80".encode('UTF-8','US-ASCII', invalid: :replace, replace: '<byte>')
# # => '<byte>'
# ArgumentError
# when operating on a string with invalid bytes
# e.g."\x80".split("\n")
# TypeError
# when a symbol is passed as an encoding
# Encoding.find(:"UTF-8")
# when calling force_encoding on an object
# that doesn't respond to #to_str
#
# Raised by transcoding methods:
# Encoding::ConverterNotFoundError:
# when a named encoding does not correspond with a known converter
# e.g. 'abc'.force_encoding('UTF-8').encode('foo')
# or a converter path cannot be found
# e.g. "\x80".force_encoding('ASCII-8BIT').encode('Emacs-Mule')
#
# Raised by byte <-> char conversions
# RangeError: out of char range
# e.g. the UTF-16LE emoji: 128169.chr
def matching_encoding(string)
string = remove_invalid_bytes(string)
string.encode(@encoding)
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
string.encode(@encoding, ENCODE_UNCONVERTABLE_BYTES)
rescue Encoding::ConverterNotFoundError
string.dup.force_encoding(@encoding).encode(ENCODE_NO_CONVERTER)
end
# Prevents raising ArgumentError
if String.method_defined?(:scrub)
# https://github.com/ruby/ruby/blob/eeb05e8c11/doc/NEWS-2.1.0#L120-L123
# https://github.com/ruby/ruby/blob/v2_1_0/string.c#L8242
# https://github.com/hsbt/string-scrub
# https://github.com/rubinius/rubinius/blob/v2.5.2/kernel/common/string.rb#L1913-L1972
def remove_invalid_bytes(string)
string.scrub(REPLACE)
end
else
# http://stackoverflow.com/a/8711118/879854
# Loop over chars in a string replacing chars
# with invalid encoding, which is a pretty good proxy
# for the invalid byte sequence that causes an ArgumentError
def remove_invalid_bytes(string)
string.chars.map do |char|
char.valid_encoding? ? char : REPLACE
end.join
end
end
def detect_source_encoding(string)
string.encoding
end
def self.pick_encoding(source_a, source_b)
Encoding.compatible?(source_a, source_b) || Encoding.default_external
end
else
def self.pick_encoding(_source_a, _source_b)
end
private
def matching_encoding(string)
string
end
def detect_source_encoding(_string)
US_ASCII
end
end
end
end
end
|