/usr/lib/python3/dist-packages/diffoscope/comparators/text.py is in diffoscope 93ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | # -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
import codecs
from diffoscope.difference import Difference
from .utils.file import File
def order_only_difference(unified_diff):
diff_lines = unified_diff.splitlines()
added_lines = [line[1:] for line in diff_lines if line.startswith('+')]
removed_lines = [line[1:] for line in diff_lines if line.startswith('-')]
# Faster check: does number of lines match?
if len(added_lines) != len(removed_lines):
return False
return sorted(added_lines) == sorted(removed_lines) and added_lines != removed_lines
class TextFile(File):
DESCRIPTION = "text files"
FILE_TYPE_RE = re.compile(r'\btext\b')
@property
def encoding(self):
if not hasattr(self, '_encoding'):
self._encoding = File.guess_encoding(self.path)
return self._encoding
def compare(self, other, source=None):
my_encoding = self.encoding or 'utf-8'
other_encoding = other.encoding or 'utf-8'
try:
with codecs.open(self.path, 'r', encoding=my_encoding) as my_content, \
codecs.open(other.path, 'r', encoding=other_encoding) as other_content:
difference = Difference.from_text_readers(my_content, other_content, self.name, other.name, source)
# Check if difference is only in line order.
if difference and order_only_difference(difference.unified_diff):
difference.add_comment("ordering differences only")
if my_encoding != other_encoding:
if difference is None:
difference = Difference(None, self.path, other.path, source)
difference.add_details([Difference.from_text(my_encoding, other_encoding, None, None, source='encoding')])
return difference
except (LookupError, UnicodeDecodeError):
# unknown or misdetected encoding
return self.compare_bytes(other, source)
|