/usr/share/spamassassin/20_html_tests.cf is in spamassassin 3.4.1-8build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | # SpamAssassin rules file: HTML tests
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
###########################################################################
require_version @@VERSION@@
# HTML parser tests
#
# please sort these by eval type then name
meta HTML_SHORT_LINK_IMG_1 __HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
meta HTML_SHORT_LINK_IMG_2 __HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
meta HTML_SHORT_LINK_IMG_3 __HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
describe HTML_SHORT_LINK_IMG_1 HTML is very short with a linked image
describe HTML_SHORT_LINK_IMG_2 HTML is very short with a linked image
describe HTML_SHORT_LINK_IMG_3 HTML is very short with a linked image
meta HTML_SHORT_CENTER (__HTML_LENGTH_384 && __TAG_EXISTS_CENTER)
describe HTML_SHORT_CENTER HTML is very short with CENTER tag
meta HTML_TITLE_SUBJ_DIFF __HTML_TITLE_SUBJ_DIFF && !__MIME_ATTACHMENT
meta HTML_CHARSET_FARAWAY (__HTML_CHARSET_FARAWAY && __HIGHBITS)
describe HTML_CHARSET_FARAWAY A foreign language charset used in HTML markup
tflags HTML_CHARSET_FARAWAY userconf
meta HTML_MIME_NO_HTML_TAG MIME_HTML_ONLY && !__TAG_EXISTS_HTML
describe HTML_MIME_NO_HTML_TAG HTML-only message, but there is no HTML tag
meta HTML_MISSING_CTYPE (!__MIME_HTML && HTML_MESSAGE)
describe HTML_MISSING_CTYPE Message is HTML without HTML Content-Type
###########################################################################
# rawbody HTML tests
rawbody HIDE_WIN_STATUS /<[^>]+onMouseOver=[^>]+window\.status=/i
describe HIDE_WIN_STATUS Javascript to hide URLs in browser
rawbody __OBFUSCATING_COMMENT_A /\w(?:<![^>]*>)+\w/
rawbody __OBFUSCATING_COMMENT_B /[^\s>](?:<![^>]*>)+[^\s<]/
ifplugin Mail::SpamAssassin::Plugin::HTMLEval
ifplugin Mail::SpamAssassin::Plugin::MIMEEval
meta OBFUSCATING_COMMENT ((__OBFUSCATING_COMMENT_A && HTML_MESSAGE) || (__OBFUSCATING_COMMENT_B && MIME_HTML_ONLY)) && !__ISO_2022_JP_DELIM
describe OBFUSCATING_COMMENT HTML comments which obfuscate text
endif
endif
# spams that are assembled from a Javascript array
# look for the XOR op
rawbody __JS_FROMCHARCODE /String\.fromCharCode\s*\(\s*\S+\s*\[\s*\S+\s*\]\s*\^/
rawbody __JS_DOCWRITE /document\.write/
meta JS_FROMCHARCODE (__JS_FROMCHARCODE && __JS_DOCWRITE)
describe JS_FROMCHARCODE Document is built from a Javascript charcode array
# a good possible rule that may resurface
# ! $ % ' ( ) , - . / : ; = ? @ _
#rawbody ENTITY_DEC_OTHER /\&\#0*(?:3[3679]|4[014567]|5[89]|6[134]|95)\;/
#describe ENTITY_DEC_OTHER HTML contains needlessly encoded punctuation
body __HIGHBITS /(?:[\x80-\xff].?){4}/
# note: __HIGHBITS is used by HTML_CHARSET_FARAWAY
###########################################################################
ifplugin Mail::SpamAssassin::Plugin::HTMLEval
# HTML control test, HTML spam rules should all have better S/O than this
body HTML_MESSAGE eval:html_test('html')
describe HTML_MESSAGE HTML included in message
# HTML comment tests
body HTML_COMMENT_SHORT eval:html_text_match('comment', '<!(?!-).{0,6}>')
describe HTML_COMMENT_SHORT HTML comment is very short
body HTML_COMMENT_SAVED_URL eval:html_text_match('comment', '<!-- saved from url=\(\d{4}\)')
describe HTML_COMMENT_SAVED_URL HTML message is a saved web page
body HTML_EMBEDS eval:html_test('embeds')
describe HTML_EMBEDS HTML with embedded plugin object
body HTML_EXTRA_CLOSE eval:html_range('closed_extra_ratio', '0.09', 'inf')
describe HTML_EXTRA_CLOSE HTML contains far too many close tags
body HTML_FONT_SIZE_LARGE eval:html_range('max_size', '5', '6')
describe HTML_FONT_SIZE_LARGE HTML font size is large
body HTML_FONT_SIZE_HUGE eval:html_range('max_size', '6', 'inf')
describe HTML_FONT_SIZE_HUGE HTML font size is huge
body HTML_FONT_LOW_CONTRAST eval:html_test('font_low_contrast')
describe HTML_FONT_LOW_CONTRAST HTML font color similar or identical to background
body HTML_FONT_FACE_BAD eval:html_test('font_face_bad')
describe HTML_FONT_FACE_BAD HTML font face is not a word
body HTML_FORMACTION_MAILTO eval:html_test('form_action_mailto')
describe HTML_FORMACTION_MAILTO HTML includes a form which sends mail
# HTML_IMAGE_ONLY - not much raw HTML with images (absolute)
body HTML_IMAGE_ONLY_04 eval:html_image_only('0000','0400')
body HTML_IMAGE_ONLY_08 eval:html_image_only('0400','0800')
body HTML_IMAGE_ONLY_12 eval:html_image_only('0800','1200')
body HTML_IMAGE_ONLY_16 eval:html_image_only('1200','1600')
body HTML_IMAGE_ONLY_20 eval:html_image_only('1600','2000')
body HTML_IMAGE_ONLY_24 eval:html_image_only('2000','2400')
body HTML_IMAGE_ONLY_28 eval:html_image_only('2400','2800')
body HTML_IMAGE_ONLY_32 eval:html_image_only('2800','3200')
describe HTML_IMAGE_ONLY_04 HTML: images with 0-400 bytes of words
describe HTML_IMAGE_ONLY_08 HTML: images with 400-800 bytes of words
describe HTML_IMAGE_ONLY_12 HTML: images with 800-1200 bytes of words
describe HTML_IMAGE_ONLY_16 HTML: images with 1200-1600 bytes of words
describe HTML_IMAGE_ONLY_20 HTML: images with 1600-2000 bytes of words
describe HTML_IMAGE_ONLY_24 HTML: images with 2000-2400 bytes of words
describe HTML_IMAGE_ONLY_28 HTML: images with 2400-2800 bytes of words
describe HTML_IMAGE_ONLY_32 HTML: images with 2800-3200 bytes of words
# HTML_IMAGE_RATIO - more image area than text (ratio)
body HTML_IMAGE_RATIO_02 eval:html_image_ratio('0.000','0.002')
body HTML_IMAGE_RATIO_04 eval:html_image_ratio('0.002','0.004')
body HTML_IMAGE_RATIO_06 eval:html_image_ratio('0.004','0.006')
body HTML_IMAGE_RATIO_08 eval:html_image_ratio('0.006','0.008')
describe HTML_IMAGE_RATIO_02 HTML has a low ratio of text to image area
describe HTML_IMAGE_RATIO_04 HTML has a low ratio of text to image area
describe HTML_IMAGE_RATIO_06 HTML has a low ratio of text to image area
describe HTML_IMAGE_RATIO_08 HTML has a low ratio of text to image area
# HTML obfuscation
body HTML_OBFUSCATE_05_10 eval:html_range('obfuscation_ratio','.05','.1')
body HTML_OBFUSCATE_10_20 eval:html_range('obfuscation_ratio','.1','.2')
body HTML_OBFUSCATE_20_30 eval:html_range('obfuscation_ratio','.2','.3')
body HTML_OBFUSCATE_30_40 eval:html_range('obfuscation_ratio','.3','.4')
body HTML_OBFUSCATE_50_60 eval:html_range('obfuscation_ratio','.5','.6')
body HTML_OBFUSCATE_70_80 eval:html_range('obfuscation_ratio','.7','.8')
body HTML_OBFUSCATE_90_100 eval:html_range('obfuscation_ratio','.9','1.0')
describe HTML_OBFUSCATE_05_10 Message is 5% to 10% HTML obfuscation
describe HTML_OBFUSCATE_10_20 Message is 10% to 20% HTML obfuscation
describe HTML_OBFUSCATE_20_30 Message is 20% to 30% HTML obfuscation
describe HTML_OBFUSCATE_30_40 Message is 30% to 40% HTML obfuscation
describe HTML_OBFUSCATE_50_60 Message is 50% to 60% HTML obfuscation
describe HTML_OBFUSCATE_70_80 Message is 70% to 80% HTML obfuscation
describe HTML_OBFUSCATE_90_100 Message is 90% to 100% HTML obfuscation
body HTML_TAG_BALANCE_BODY eval:html_tag_balance('body', '!= 0')
describe HTML_TAG_BALANCE_BODY HTML has unbalanced "body" tags
body HTML_TAG_BALANCE_HEAD eval:html_tag_balance('head', '!= 0')
describe HTML_TAG_BALANCE_HEAD HTML has unbalanced "head" tags
body HTML_TAG_EXIST_BGSOUND eval:html_tag_exists('bgsound')
describe HTML_TAG_EXIST_BGSOUND HTML has "bgsound" tag
# percentage of tags that are not legal elements in HTML
body HTML_BADTAG_40_50 eval:html_range('bad_tag_ratio','0.40','0.50')
body HTML_BADTAG_50_60 eval:html_range('bad_tag_ratio','0.50','0.60')
body HTML_BADTAG_60_70 eval:html_range('bad_tag_ratio','0.60','0.70')
body HTML_BADTAG_90_100 eval:html_range('bad_tag_ratio','0.90','1.00')
describe HTML_BADTAG_40_50 HTML message is 40% to 50% bad tags
describe HTML_BADTAG_50_60 HTML message is 50% to 60% bad tags
describe HTML_BADTAG_60_70 HTML message is 60% to 70% bad tags
describe HTML_BADTAG_90_100 HTML message is 90% to 100% bad tags
# percentage of unique non-elements in HTML
body HTML_NONELEMENT_30_40 eval:html_range('non_element_ratio','0.30','0.40')
body HTML_NONELEMENT_40_50 eval:html_range('non_element_ratio','0.40','0.50')
body HTML_NONELEMENT_60_70 eval:html_range('non_element_ratio','0.60','0.70')
body HTML_NONELEMENT_80_90 eval:html_range('non_element_ratio','0.80','0.90')
describe HTML_NONELEMENT_30_40 30% to 40% of HTML elements are non-standard
describe HTML_NONELEMENT_40_50 40% to 50% of HTML elements are non-standard
describe HTML_NONELEMENT_60_70 60% to 70% of HTML elements are non-standard
describe HTML_NONELEMENT_80_90 80% to 90% of HTML elements are non-standard
# short HTML messages with certain attributes
body __HTML_LINK_IMAGE eval:html_text_match('anchor', '<img>')
body __HTML_LENGTH_0000_1024 eval:html_range('length', '0', '1024')
body __HTML_LENGTH_1024_1536 eval:html_range('length', '1024', '1536')
body __HTML_LENGTH_1536_2048 eval:html_range('length', '1536', '2048')
body __HTML_LENGTH_512 eval:html_eval('length', '< 512')
body __COMMENT_EXISTS eval:html_text_match('comment', '<!.*?>')
body __HTML_LENGTH_384 eval:html_eval('length', '< 384')
body __TAG_EXISTS_CENTER eval:html_tag_exists('center')
body __HTML_TITLE_120 eval:html_text_match('title', '.{120}')
body __HTML_TITLE_SUBJ_DIFF eval:html_title_subject_ratio('3.5')
body __HTML_CHARSET_FARAWAY eval:html_charset_faraway()
body HTML_IFRAME_SRC eval:check_iframe_src()
describe HTML_IFRAME_SRC Message has HTML IFRAME tag with SRC URI
else
meta __COMMENT_EXISTS 0
meta __TAG_EXISTS_CENTER 0
endif
###########################################################################
ifplugin Mail::SpamAssassin::Plugin::MIMEEval
# __MIME_ATTACHMENT also used in 20_meta_tests.cf
body __MIME_ATTACHMENT eval:check_for_mime('mime_attachment')
endif
|