This file is indexed.

/usr/share/spamassassin/20_html_tests.cf is in spamassassin 3.4.1-8build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# SpamAssassin rules file: HTML tests
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
###########################################################################

require_version @@VERSION@@

# HTML parser tests
#
# please sort these by eval type then name

meta HTML_SHORT_LINK_IMG_1	__HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
meta HTML_SHORT_LINK_IMG_2	__HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
meta HTML_SHORT_LINK_IMG_3	__HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
describe HTML_SHORT_LINK_IMG_1	HTML is very short with a linked image
describe HTML_SHORT_LINK_IMG_2	HTML is very short with a linked image
describe HTML_SHORT_LINK_IMG_3	HTML is very short with a linked image


meta HTML_SHORT_CENTER		(__HTML_LENGTH_384 && __TAG_EXISTS_CENTER)
describe HTML_SHORT_CENTER	HTML is very short with CENTER tag


meta HTML_TITLE_SUBJ_DIFF	__HTML_TITLE_SUBJ_DIFF && !__MIME_ATTACHMENT

meta HTML_CHARSET_FARAWAY	(__HTML_CHARSET_FARAWAY && __HIGHBITS)
describe HTML_CHARSET_FARAWAY	A foreign language charset used in HTML markup
tflags HTML_CHARSET_FARAWAY	userconf

meta HTML_MIME_NO_HTML_TAG	MIME_HTML_ONLY && !__TAG_EXISTS_HTML
describe HTML_MIME_NO_HTML_TAG	HTML-only message, but there is no HTML tag

meta HTML_MISSING_CTYPE		(!__MIME_HTML && HTML_MESSAGE)
describe HTML_MISSING_CTYPE	Message is HTML without HTML Content-Type

###########################################################################
# rawbody HTML tests

rawbody HIDE_WIN_STATUS		/<[^>]+onMouseOver=[^>]+window\.status=/i
describe HIDE_WIN_STATUS	Javascript to hide URLs in browser

rawbody __OBFUSCATING_COMMENT_A	/\w(?:<![^>]*>)+\w/
rawbody __OBFUSCATING_COMMENT_B	/[^\s>](?:<![^>]*>)+[^\s<]/
ifplugin Mail::SpamAssassin::Plugin::HTMLEval
ifplugin Mail::SpamAssassin::Plugin::MIMEEval
meta OBFUSCATING_COMMENT	((__OBFUSCATING_COMMENT_A && HTML_MESSAGE) || (__OBFUSCATING_COMMENT_B && MIME_HTML_ONLY)) && !__ISO_2022_JP_DELIM
describe OBFUSCATING_COMMENT	HTML comments which obfuscate text
endif
endif

# spams that are assembled from a Javascript array
# look for the XOR op
rawbody __JS_FROMCHARCODE       /String\.fromCharCode\s*\(\s*\S+\s*\[\s*\S+\s*\]\s*\^/
rawbody __JS_DOCWRITE           /document\.write/
meta JS_FROMCHARCODE            (__JS_FROMCHARCODE && __JS_DOCWRITE)
describe JS_FROMCHARCODE        Document is built from a Javascript charcode array

# a good possible rule that may resurface
# ! $ % ' ( ) , - . / : ; = ? @ _
#rawbody ENTITY_DEC_OTHER	/\&\#0*(?:3[3679]|4[014567]|5[89]|6[134]|95)\;/
#describe ENTITY_DEC_OTHER	HTML contains needlessly encoded punctuation

body __HIGHBITS                     /(?:[\x80-\xff].?){4}/
# note: __HIGHBITS is used by HTML_CHARSET_FARAWAY

###########################################################################

ifplugin Mail::SpamAssassin::Plugin::HTMLEval

# HTML control test, HTML spam rules should all have better S/O than this
body HTML_MESSAGE		eval:html_test('html')
describe HTML_MESSAGE		HTML included in message

# HTML comment tests
body HTML_COMMENT_SHORT		eval:html_text_match('comment', '<!(?!-).{0,6}>')
describe HTML_COMMENT_SHORT	HTML comment is very short

body HTML_COMMENT_SAVED_URL	eval:html_text_match('comment', '<!-- saved from url=\(\d{4}\)')
describe HTML_COMMENT_SAVED_URL	HTML message is a saved web page

body HTML_EMBEDS		eval:html_test('embeds')
describe HTML_EMBEDS		HTML with embedded plugin object


body HTML_EXTRA_CLOSE		eval:html_range('closed_extra_ratio', '0.09', 'inf')
describe HTML_EXTRA_CLOSE	HTML contains far too many close tags



body HTML_FONT_SIZE_LARGE	eval:html_range('max_size', '5', '6')
describe HTML_FONT_SIZE_LARGE	HTML font size is large

body HTML_FONT_SIZE_HUGE	eval:html_range('max_size', '6', 'inf')
describe HTML_FONT_SIZE_HUGE	HTML font size is huge




body HTML_FONT_LOW_CONTRAST	eval:html_test('font_low_contrast')
describe HTML_FONT_LOW_CONTRAST	HTML font color similar or identical to background

body HTML_FONT_FACE_BAD		eval:html_test('font_face_bad')
describe HTML_FONT_FACE_BAD	HTML font face is not a word


body HTML_FORMACTION_MAILTO	eval:html_test('form_action_mailto')
describe HTML_FORMACTION_MAILTO	HTML includes a form which sends mail

# HTML_IMAGE_ONLY - not much raw HTML with images (absolute)
body HTML_IMAGE_ONLY_04		eval:html_image_only('0000','0400')
body HTML_IMAGE_ONLY_08		eval:html_image_only('0400','0800')
body HTML_IMAGE_ONLY_12		eval:html_image_only('0800','1200')
body HTML_IMAGE_ONLY_16		eval:html_image_only('1200','1600')
body HTML_IMAGE_ONLY_20		eval:html_image_only('1600','2000')
body HTML_IMAGE_ONLY_24		eval:html_image_only('2000','2400')
body HTML_IMAGE_ONLY_28		eval:html_image_only('2400','2800')
body HTML_IMAGE_ONLY_32		eval:html_image_only('2800','3200')
describe HTML_IMAGE_ONLY_04	HTML: images with 0-400 bytes of words
describe HTML_IMAGE_ONLY_08	HTML: images with 400-800 bytes of words
describe HTML_IMAGE_ONLY_12	HTML: images with 800-1200 bytes of words
describe HTML_IMAGE_ONLY_16	HTML: images with 1200-1600 bytes of words
describe HTML_IMAGE_ONLY_20	HTML: images with 1600-2000 bytes of words
describe HTML_IMAGE_ONLY_24	HTML: images with 2000-2400 bytes of words
describe HTML_IMAGE_ONLY_28	HTML: images with 2400-2800 bytes of words
describe HTML_IMAGE_ONLY_32	HTML: images with 2800-3200 bytes of words

# HTML_IMAGE_RATIO - more image area than text (ratio)
body HTML_IMAGE_RATIO_02	eval:html_image_ratio('0.000','0.002')
body HTML_IMAGE_RATIO_04        eval:html_image_ratio('0.002','0.004')
body HTML_IMAGE_RATIO_06        eval:html_image_ratio('0.004','0.006')
body HTML_IMAGE_RATIO_08        eval:html_image_ratio('0.006','0.008')
describe HTML_IMAGE_RATIO_02    HTML has a low ratio of text to image area
describe HTML_IMAGE_RATIO_04    HTML has a low ratio of text to image area
describe HTML_IMAGE_RATIO_06    HTML has a low ratio of text to image area
describe HTML_IMAGE_RATIO_08    HTML has a low ratio of text to image area

# HTML obfuscation
body HTML_OBFUSCATE_05_10	eval:html_range('obfuscation_ratio','.05','.1')
body HTML_OBFUSCATE_10_20	eval:html_range('obfuscation_ratio','.1','.2')
body HTML_OBFUSCATE_20_30	eval:html_range('obfuscation_ratio','.2','.3')
body HTML_OBFUSCATE_30_40	eval:html_range('obfuscation_ratio','.3','.4')
body HTML_OBFUSCATE_50_60	eval:html_range('obfuscation_ratio','.5','.6')
body HTML_OBFUSCATE_70_80	eval:html_range('obfuscation_ratio','.7','.8')
body HTML_OBFUSCATE_90_100	eval:html_range('obfuscation_ratio','.9','1.0')
describe HTML_OBFUSCATE_05_10	Message is 5% to 10% HTML obfuscation
describe HTML_OBFUSCATE_10_20	Message is 10% to 20% HTML obfuscation
describe HTML_OBFUSCATE_20_30	Message is 20% to 30% HTML obfuscation
describe HTML_OBFUSCATE_30_40	Message is 30% to 40% HTML obfuscation
describe HTML_OBFUSCATE_50_60	Message is 50% to 60% HTML obfuscation
describe HTML_OBFUSCATE_70_80	Message is 70% to 80% HTML obfuscation
describe HTML_OBFUSCATE_90_100	Message is 90% to 100% HTML obfuscation

body HTML_TAG_BALANCE_BODY	eval:html_tag_balance('body', '!= 0')
describe HTML_TAG_BALANCE_BODY	HTML has unbalanced "body" tags

body HTML_TAG_BALANCE_HEAD	eval:html_tag_balance('head', '!= 0')
describe HTML_TAG_BALANCE_HEAD	HTML has unbalanced "head" tags

body HTML_TAG_EXIST_BGSOUND	eval:html_tag_exists('bgsound')
describe HTML_TAG_EXIST_BGSOUND	HTML has "bgsound" tag

# percentage of tags that are not legal elements in HTML
body HTML_BADTAG_40_50	eval:html_range('bad_tag_ratio','0.40','0.50')
body HTML_BADTAG_50_60	eval:html_range('bad_tag_ratio','0.50','0.60')
body HTML_BADTAG_60_70	eval:html_range('bad_tag_ratio','0.60','0.70')
body HTML_BADTAG_90_100	eval:html_range('bad_tag_ratio','0.90','1.00')
describe HTML_BADTAG_40_50	HTML message is 40% to 50% bad tags
describe HTML_BADTAG_50_60	HTML message is 50% to 60% bad tags
describe HTML_BADTAG_60_70	HTML message is 60% to 70% bad tags
describe HTML_BADTAG_90_100	HTML message is 90% to 100% bad tags

# percentage of unique non-elements in HTML
body HTML_NONELEMENT_30_40	eval:html_range('non_element_ratio','0.30','0.40')
body HTML_NONELEMENT_40_50	eval:html_range('non_element_ratio','0.40','0.50')
body HTML_NONELEMENT_60_70	eval:html_range('non_element_ratio','0.60','0.70')
body HTML_NONELEMENT_80_90	eval:html_range('non_element_ratio','0.80','0.90')
describe HTML_NONELEMENT_30_40	30% to 40% of HTML elements are non-standard
describe HTML_NONELEMENT_40_50	40% to 50% of HTML elements are non-standard
describe HTML_NONELEMENT_60_70	60% to 70% of HTML elements are non-standard
describe HTML_NONELEMENT_80_90	80% to 90% of HTML elements are non-standard

# short HTML messages with certain attributes
body __HTML_LINK_IMAGE		eval:html_text_match('anchor', '<img>')
body __HTML_LENGTH_0000_1024	eval:html_range('length', '0', '1024')
body __HTML_LENGTH_1024_1536	eval:html_range('length', '1024', '1536')
body __HTML_LENGTH_1536_2048	eval:html_range('length', '1536', '2048')

body __HTML_LENGTH_512		eval:html_eval('length', '< 512')
body __COMMENT_EXISTS		eval:html_text_match('comment', '<!.*?>')

body __HTML_LENGTH_384		eval:html_eval('length', '< 384')
body __TAG_EXISTS_CENTER	eval:html_tag_exists('center')

body __HTML_TITLE_120		eval:html_text_match('title', '.{120}')

body __HTML_TITLE_SUBJ_DIFF	eval:html_title_subject_ratio('3.5')


body __HTML_CHARSET_FARAWAY	eval:html_charset_faraway()

body HTML_IFRAME_SRC	eval:check_iframe_src()
describe HTML_IFRAME_SRC	Message has HTML IFRAME tag with SRC URI

else

meta __COMMENT_EXISTS		0
meta __TAG_EXISTS_CENTER	0

endif

###########################################################################

ifplugin Mail::SpamAssassin::Plugin::MIMEEval

# __MIME_ATTACHMENT also used in 20_meta_tests.cf
body __MIME_ATTACHMENT		eval:check_for_mime('mime_attachment')

endif