This file is indexed.

/usr/share/spamassassin/20_body_tests.cf is in spamassassin 3.4.1-8build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# SpamAssassin rules file: body tests
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# Note: body tests are run with long lines, so be sure to limit the
# size of searches; use /.{0,30}/ instead of /.*/ to avoid huge
# search times.
#
# Note: If you are adding a rule which looks for a phrase in the body
# (as most of them do), please add it to rules/20_phrases.cf instead.
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
###########################################################################

require_version @@VERSION@@

###########################################################################
# GTUBE test - the generic test for UBE.
body GTUBE		/XJS\*C4JDBQADN1\.NSBN3\*2IDNEN\*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL\*C\.34X/
describe GTUBE		Generic Test for Unsolicited Bulk Email
tflags GTUBE		userconf noautolearn

###########################################################################

# this seems to be the new fashion (as of Jul 5 2002).  base64-encoded
# parts need to be stripped before this match
body TRACKER_ID		/^[a-z0-9]{6,24}[-_a-z0-9]{12,36}[a-z0-9]{6,24}\s*\z/is
describe TRACKER_ID	Incorporates a tracking ID number

body WEIRD_QUOTING	/[\042\223\224\262\263\271]{2}\S{0,16}[\042\223\224\262\263\271]{2}/
describe WEIRD_QUOTING	Weird repeated double-quotation marks

###########################################################################
# multipart/alternative has very good accuracy, other multipart types are
# similar to MIME_HTML_ONLY so they don't need a separate rule
header __CTYPE_MULTIPART_ALT	Content-Type =~ /multipart\/alternative/i
meta MIME_HTML_ONLY_MULTI	(__CTYPE_MULTIPART_ALT && MIME_HTML_ONLY)
describe MIME_HTML_ONLY_MULTI	Multipart message only has text/html MIME parts

# note: __HIGHBITS is used in rules/20_html_tests.cf, HTML_CHARSET_FARAWAY
meta MIME_CHARSET_FARAWAY	(__MIME_CHARSET_FARAWAY && __HIGHBITS)
describe MIME_CHARSET_FARAWAY	MIME character set indicates foreign language
tflags MIME_CHARSET_FARAWAY	userconf

###########################################################################

# duncf
body EMAIL_ROT13     /\b[a-z(\]-]+\^[a-z-]+\([a-z]{2,3}\b/
describe EMAIL_ROT13 Body contains a ROT13-encoded email address
test EMAIL_ROT13 ok  qhabs^ebtref(pbz
test EMAIL_ROT13 ok  zxrggyre^riv-vap(pbz
test EMAIL_ROT13 fail	duncf-nospam@rogers.com

# this could use more work
body __LONGWORDS_A	/\b(?:[a-z]{8,}[\s\.]+){6}/
body __LONGWORDS_B	/\b(?:[a-z]{6,}[\s\.]+){9}/
body __LONGWORDS_C	/\b(?:[a-z]{5,}[\s\.]+){10}/
meta LONGWORDS		(__LONGWORDS_A + __LONGWORDS_B + __LONGWORDS_C > 1)
describe LONGWORDS	Long string of long words


###########################################################################

ifplugin Mail::SpamAssassin::Plugin::BodyEval


# This rule uses a simple algorithm to determine if the text and html
# parts of an multipart/alternative message are different.
body MPART_ALT_DIFF	eval:multipart_alternative_difference('99', '100')
describe MPART_ALT_DIFF	HTML and text parts are different

body MPART_ALT_DIFF_COUNT	eval:multipart_alternative_difference_count('3', '1')
describe MPART_ALT_DIFF_COUNT	HTML and text parts are different

body BLANK_LINES_80_90	eval:check_blank_line_ratio('80','90','4')
describe BLANK_LINES_80_90  Message body has 80-90% blank lines

# it's the ratio of spaces to non-spaces in each paragraph.  apparently
# messages where generally there are lots of spaces mean the message is spam.
# 8.532  10.6051   0.1897    0.982   0.75    0.01  T_VERTICAL_WORDS_TVD_1
# bug 6149: avoid common .jp false positives
header __SUBJECT_UTF8_B_ENCODED     Subject:raw =~ /=\?UTF-?8\?B\?/i
body __TVD_SPACE_RATIO	eval:tvd_vertical_words('0','10')
meta TVD_SPACE_RATIO    (__TVD_SPACE_RATIO && !__ISO_2022_JP_DELIM && !__SUBJECT_UTF8_B_ENCODED && !__HIGHBITS)

endif

###########################################################################

ifplugin Mail::SpamAssassin::Plugin::MIMEEval

# 0.767   0.9097   0.0000    1.000   0.84    1.00  MULTIPART_ALT_NON_TEXT
body MULTIPART_ALT_NON_TEXT	eval:check_ma_non_text()

body CHARSET_FARAWAY		eval:check_for_faraway_charset()
describe CHARSET_FARAWAY	Character set indicates a foreign language
tflags CHARSET_FARAWAY		userconf

# these tests doesn't actually use rawbody since rawbody isn't raw enough;
# they must be written very carefully to avoid modifying the original content

# MIME Content-Transfer-Encoding control rules
rawbody __MIME_BASE64		eval:check_for_mime('mime_base64_count')
describe __MIME_BASE64		Includes a base64 attachment

rawbody __MIME_QP		eval:check_for_mime('mime_qp_count')
describe __MIME_QP		Includes a quoted-printable attachment

rawbody MIME_BASE64_BLANKS	eval:check_for_mime('mime_base64_blanks')
describe MIME_BASE64_BLANKS	Extra blank lines in base64 encoding


rawbody MIME_BASE64_TEXT	eval:check_for_mime('mime_base64_encoded_text')
describe MIME_BASE64_TEXT	Message text disguised using base64 encoding


body MISSING_MIME_HB_SEP	eval:check_msg_parse_flags('missing_mime_head_body_separator')
describe MISSING_MIME_HB_SEP	Missing blank line between MIME header and body

body MIME_HTML_MOSTLY		eval:check_mime_multipart_ratio('0.00','0.01')
describe MIME_HTML_MOSTLY	Multipart message mostly text/html MIME

# Steve Linford via Charlie Watts: good test!
body MIME_HTML_ONLY		eval:check_for_mime_html_only()
describe MIME_HTML_ONLY		Message only has text/html MIME parts

rawbody  MIME_QP_LONG_LINE	eval:check_for_mime('mime_qp_long_line')
describe MIME_QP_LONG_LINE	Quoted-printable line longer than 76 chars

rawbody __MIME_CHARSET_FARAWAY	eval:check_for_mime('mime_faraway_charset')

body MIME_BAD_ISO_CHARSET	eval:check_for_mime('mime_bad_iso_charset')
describe MIME_BAD_ISO_CHARSET	MIME character set is an unknown ISO charset

endif

###########################################################################

ifplugin Mail::SpamAssassin::Plugin::URIEval

body HTTPS_IP_MISMATCH	eval:check_https_ip_mismatch()
describe HTTPS_IP_MISMATCH	IP to HTTPS link found in HTML

body URI_TRUNCATED	eval:check_uri_truncated()
describe URI_TRUNCATED	Message contained a URI which was truncated

endif