/usr/lib/ruby/vendor_ruby/chef_zero/solr/solr_parser.rb is in chef-zero 4.5.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | require 'chef_zero/solr/query/binary_operator'
require 'chef_zero/solr/query/unary_operator'
require 'chef_zero/solr/query/term'
require 'chef_zero/solr/query/phrase'
require 'chef_zero/solr/query/range_query'
require 'chef_zero/solr/query/subquery'
module ChefZero
module Solr
class SolrParser
def initialize(query_string)
@query_string = query_string
@index = 0
end
def parse
read_expression
end
#
# Tokenization
#
def peek_token
@next_token ||= parse_token
end
def next_token
result = peek_token
@next_token = nil
result
end
def parse_token
# Skip whitespace
skip_whitespace
return nil if eof?
# Operators
operator = peek_operator_token
if operator
@index+=operator.length
operator
else
# Everything that isn't whitespace or an operator, is part of a term
# (characters plus backslashed escaped characters)
start_index = @index
begin
if @query_string[@index] == '\\'
@index+=1
end
@index+=1 if !eof?
end while !eof? && peek_term_token
@query_string[start_index..@index-1]
end
end
def skip_whitespace
if @query_string[@index] =~ /\s/
whitespace = /\s+/.match(@query_string, @index) || peek
@index += whitespace[0].length
end
end
def peek_term_token
return nil if @query_string[@index] =~ /\s/
op = peek_operator_token
return !op || op == '-'
end
def peek_operator_token
if ['"', '+', '-', '!', '(', ')', '{', '}', '[', ']', '^', ':'].include?(@query_string[@index])
return @query_string[@index]
else
result = @query_string[@index..@index+1]
if ['&&', '||'].include?(result)
return result
end
end
nil
end
def eof?
!@next_token && @index >= @query_string.length
end
# Parse tree creation
def read_expression
result = read_single_expression
# Expression is over when we hit a close paren or eof
# (peek_token has the side effect of skipping whitespace for us, so we
# really know if we're at eof or not)
until peek_token == ')' || eof?
operator = peek_token
if binary_operator?(operator)
next_token
else
# If 2 terms are next to each other, the default operator is OR
operator = 'OR'
end
next_expression = read_single_expression
# Build the operator, taking precedence into account
if result.is_a?(Query::BinaryOperator) &&
binary_operator_precedence(operator) > binary_operator_precedence(result.operator)
# a+b*c -> a+(b*c)
new_right = Query::BinaryOperator.new(result.right, operator, next_expression)
result = Query::BinaryOperator.new(result.left, result.operator, new_right)
else
# a*b+c -> (a*b)+c
result = Query::BinaryOperator.new(result, operator, next_expression)
end
end
result
end
def parse_error(token, str)
raise "Error on token '#{token}' at #{@index} of '#{@query_string}': #{str}"
end
def read_single_expression
token = next_token
# If EOF, we have a problem Houston
if !token
parse_error(nil, "Expected expression!")
# If it's an unary operand, build that
elsif unary_operator?(token)
operand = read_single_expression
# TODO We rely on all unary operators having higher precedence than all
# binary operators. Check if this is the case.
Query::UnaryOperator.new(token, operand)
# If it's the start of a phrase, read the terms in the phrase
elsif token == '"'
# Read terms until close "
phrase_terms = []
until (term = next_token) == '"'
phrase_terms << Query::Term.new(term)
end
Query::Phrase.new(phrase_terms)
# If it's the start of a range query, build that
elsif token == '{' || token == '['
left = next_token
parse_error(left, "Expected left term in range query") if !left
to = next_token
parse_error(left, "Expected TO in range query") if to != "TO"
right = next_token
parse_error(right, "Expected left term in range query") if !right
end_range = next_token
parse_error(right, "Expected end range '#{end_range}") if !['}', ']'].include?(end_range)
Query::RangeQuery.new(left, right, token == '[', end_range == ']')
elsif token == '('
subquery = read_expression
close_paren = next_token
parse_error(close_paren, "Expected ')'") if close_paren != ')'
Query::Subquery.new(subquery)
# If it's the end of a closure, raise an exception
elsif ['}',']',')'].include?(token)
parse_error(token, "Unexpected end paren")
# If it's a binary operator, raise an exception
elsif binary_operator?(token)
parse_error(token, "Unexpected binary operator")
# Otherwise it's a term.
else
term = Query::Term.new(token)
if peek_token == ':'
Query::BinaryOperator.new(term, next_token, read_single_expression)
else
term
end
end
end
def unary_operator?(token)
[ 'NOT', '+', '-' ].include?(token)
end
def binary_operator?(token)
[ 'AND', 'OR', '^', ':'].include?(token)
end
def binary_operator_precedence(token)
case token
when '^'
4
when ':'
3
when 'AND'
2
when 'OR'
1
end
end
DEFAULT_FIELD = 'text'
end
end
end
|