Source code for coalib.results.Diff

import copy
import difflib
import logging

from unidiff import PatchSet

from coalib.results.LineDiff import LineDiff, ConflictError
from coalib.results.SourceRange import SourceRange
from coalib.results.TextRange import TextRange
from coala_utils.decorators import enforce_signature, generate_eq

[docs]@generate_eq('_file', 'modified', 'rename', 'delete') class Diff: """ A Diff result represents a difference for one file. """ def __init__(self, file_list, rename=False, delete=False): """ Creates an empty diff for the given file. :param file_list: The original (unmodified) file as a list of its lines. :param rename: False or str containing new name of file. :param delete: True if file is set to be deleted. """ self._changes = {} self._file = list(file_list) self._original = self._generate_linebreaks(self._file) self.rename = rename self.delete = delete
[docs] @classmethod def from_string_arrays(cls, file_array_1, file_array_2, rename=False): """ Creates a Diff object from two arrays containing strings. If this Diff is applied to the original array, the second array will be created. :param file_array_1: Original array :param file_array_2: Array to compare :param rename: False or str containing new name of file. """ result = cls(file_array_1, rename=rename) matcher = difflib.SequenceMatcher(None, file_array_1, file_array_2) # We use this because its faster (generator) and doesn't yield as much # useless information as get_opcodes. for change_group in matcher.get_grouped_opcodes(1): for (tag, a_index_1, a_index_2, b_index_1, b_index_2) in change_group: if tag == 'delete': for index in range(a_index_1+1, a_index_2+1): result.delete_line(index) elif tag == 'insert': # We add after line, they add before, so dont add 1 here result.add_lines(a_index_1, file_array_2[b_index_1:b_index_2]) elif tag == 'replace': result.modify_line(a_index_1+1, file_array_2[b_index_1]) result.add_lines(a_index_1+1, file_array_2[b_index_1+1:b_index_2]) for index in range(a_index_1+2, a_index_2+1): result.delete_line(index) return result
[docs] @classmethod def from_unified_diff(cls, unified_diff, original_file): """ Creates a ``Diff`` object from given unified diff. If the provided unified diff does not contain any patch, the ``Diff`` object initialized from the original file is returned. :param unified_diff: Unified diff string. :param original_file: The contents of the original file (line-splitted). :raises RuntimeError: Raised when the context lines or the lines to be removed do not match in the original file and the unified diff. """ patch_set = PatchSet(unified_diff.splitlines()) diff = Diff(original_file) if not patch_set: return diff # FIXME Handle patches consisting of changes in more than one file file = patch_set[0] for hunk in file: file_line = hunk.source_start hunk_iterator = iter(hunk) try: while True: line = next(hunk_iterator) source_code = str(line)[1:] if line.is_added: add_set = [] # As ``Diff`` does not allow line additions to a # position more than one time, add all the # consecutive '+' lines at once. try: while line.is_added: add_set.append(source_code) line = next(hunk_iterator) source_code = str(line)[1:] diff.add_lines(file_line-1, add_set) except StopIteration: diff.add_lines(file_line-1, add_set) break original_line = original_file[file_line-1].rstrip('\n') if line.is_removed: if source_code != original_line: raise RuntimeError( 'The line to delete does not match with ' 'the line in the original file. ' 'Line to delete: {!r}, ' 'Original line #{!r}: {!r}'.format( source_code, file_line, original_line) ) diff.delete_line(file_line) else: if source_code != original_line: raise RuntimeError( 'Context lines do not match. ' 'Line from unified diff: {!r}, ' 'Original line #{!r}: {!r}'.format( source_code, file_line, original_line) ) file_line += 1 except StopIteration: pass return diff
def _get_change(self, line_nr, min_line=1): if not isinstance(line_nr, int): raise TypeError('line_nr needs to be an integer.') if line_nr < min_line: raise IndexError('The given line number is not allowed.') return self._changes.get(line_nr, LineDiff())
[docs] def stats(self): """ Returns tuple containing number of additions and deletions in the diff. """ additions = 0 deletions = 0 for line_diff in self._changes.values(): if line_diff.change: additions += 1 deletions += 1 elif line_diff.delete: deletions += 1 if line_diff.add_after: additions += len(line_diff.add_after) return additions, deletions
def __len__(self): """ Returns total number of additions and deletions in diff. """ return sum(self.stats()) @property def rename(self): """ :return: string containing new name of the file. """ return self._rename @rename.setter @enforce_signature def rename(self, rename: (str, False)): """ :param rename: False or string containing new name of file. """ self._rename = rename @property def delete(self): """ :return: True if file is set to be deleted. """ return self._delete @delete.setter @enforce_signature def delete(self, delete: bool): """ :param delete: True if file is set to be deleted, False otherwise. """ self._delete = delete @property def original(self): """ Retrieves the original file. """ return self._original def _raw_modified(self): """ Calculates the modified file, after applying the Diff to the original. """ result = [] if self.delete: return result current_line = 0 # Note that line_nr counts from _1_ although 0 is possible when # inserting lines before everything for line_nr in sorted(self._changes): result.extend(self._file[current_line:max(line_nr-1, 0)]) linediff = self._changes[line_nr] if not linediff.delete and not linediff.change and line_nr > 0: result.append(self._file[line_nr-1]) elif linediff.change: result.append(linediff.change[1]) if linediff.add_after: result.extend(linediff.add_after) current_line = line_nr result.extend(self._file[current_line:]) return result @property def modified(self): """ Calculates the modified file, after applying the Diff to the original. This property also adds linebreaks at the end of each line. If no newline was present at the end of file before, this state will be preserved, except if the last line is deleted. """ return self._generate_linebreaks(self._raw_modified()) @property def unified_diff(self): """ Generates a unified diff corresponding to this patch. Each change will be displayed on its own line. Additionally, the unified diff preserves the EOF-state of the original file. This means that the ``Diff`` will only have a linebreak on the last line, if that was also present in the original file. Note that the unified diff is not deterministic and thus not suitable for equality comparison. """ list_unified_diff = list(difflib.unified_diff( self._file, self._raw_modified(), tofile=self.rename if isinstance(self.rename, str) else '')) return ''.join(self._generate_linebreaks(list_unified_diff)) def __json__(self): """ Override JSON export, using the unified diff is the easiest thing for the users. """ return self.unified_diff
[docs] def affected_code(self, filename): """ Creates a list of SourceRange objects which point to the related code. Changes on continuous lines will be put into one SourceRange. :param filename: The filename to associate the SourceRange's to. :return: A list of all related SourceRange objects. """ return list(diff.range(filename) for diff in self.split_diff(distance=0))
[docs] def split_diff(self, distance=1): """ Splits this diff into small pieces, such that several continuously altered lines are still together in one diff. All subdiffs will be yielded. A diff like this with changes being together closely won't be splitted: >>> diff = Diff.from_string_arrays([ 'b', 'c', 'e'], ... ['a', 'b', 'd', 'f']) >>> len(list(diff.split_diff())) 1 If we set the distance to 0, it will be splitted: >>> len(list(diff.split_diff(distance=0))) 2 If a negative distance is given, every change will be yielded as an own diff, even if they are right beneath each other: >>> len(list(diff.split_diff(distance=-1))) 3 If a file gets renamed or deleted only, it will be yielded as is: >>> len(list(Diff([], rename='test').split_diff())) 1 An empty diff will not yield any diffs: >>> len(list(Diff([]).split_diff())) 0 :param distance: Number of unchanged lines that are allowed in between two changed lines so they get yielded as one diff. """ if not self: return last_line = -1 this_diff = Diff(self._file, rename=self.rename, delete=self.delete) for line in sorted(self._changes.keys()): if line > last_line + distance + 1 and len(this_diff._changes) > 0: yield this_diff this_diff = Diff(self._file, rename=self.rename, delete=self.delete) last_line = line this_diff._changes[line] = self._changes[line] # If the diff contains no line changes, the loop above will not be run # else, this_diff will never be empty and thus this has to be yielded # always. yield this_diff
[docs] def range(self, filename): """ Calculates a SourceRange spanning over the whole Diff. If something is added after the 0th line (i.e. before the first line) the first line will be included in the SourceRange. The range of an empty diff will only affect the filename: >>> range = Diff([]).range("file") >>> range.file is None False >>> print(range.start.line) None :param filename: The filename to associate the SourceRange with. :return: A SourceRange object. """ if len(self._changes) == 0: return SourceRange.from_values(filename) start = min(self._changes.keys()) end = max(self._changes.keys()) return SourceRange.from_values(filename, start_line=max(1, start), end_line=max(1, end))
def __add__(self, other): """ Adds another diff to this one. Will throw an exception if this is not possible. (This will *not* be done in place.) """ if not isinstance(other, Diff): raise TypeError('Only diffs can be added to a diff.') if self.rename != other.rename and False not in (self.rename, other.rename): raise ConflictError('Diffs contain conflicting renamings.') result = copy.deepcopy(self) result.rename = self.rename or other.rename result.delete = self.delete or other.delete for line_nr in other._changes: change = other._changes[line_nr] if change.delete is True: result.delete_line(line_nr) if change.add_after is not False: result.add_lines(line_nr, change.add_after) if change.change is not False: result.modify_line(line_nr, change.change[1]) return result def __bool__(self): """ >>> bool(Diff([])) False >>> bool(Diff([], rename="some")) True >>> bool(Diff([], delete=True)) True >>> bool(Diff.from_string_arrays(['1'], [])) True :return: False if the patch has no effect at all when applied. """ return (self.rename is not False or self.delete is True or self.modified != self.original)
[docs] def delete_line(self, line_nr): """ Mark the given line nr as deleted. The first line is line number 1. Raises an exception if line number doesn't exist in the diff. """ if line_nr > len(self._file): raise IndexError('The given line number is out of bounds.') linediff = self._get_change(line_nr) linediff.delete = True self._changes[line_nr] = linediff
[docs] def delete_lines(self, line_nr_start, line_nr_end): """ Delete lines in a specified range, inclusively. The range must be valid, i.e. lines must exist in diff, else an exception is raised. """ for line_nr in range(line_nr_start, line_nr_end + 1): self.delete_line(line_nr)
[docs] def add_lines(self, line_nr_before, lines): """ Adds lines after the given line number. :param line_nr_before: Line number of the line before the additions. Use 0 for insert lines before everything. :param lines: A list of lines to add. """ if lines == []: return # No action linediff = self._get_change(line_nr_before, min_line=0) if linediff.add_after is not False: raise ConflictError('Cannot add lines after the given line since ' 'there are already lines.') linediff.add_after = lines self._changes[line_nr_before] = linediff
[docs] def add_line(self, line_nr_before, line): """ Adds line after the given line number. :param line_nr_before: Line number of the line before the addition. Use 0 to insert line before everything. :param line: Line to add. """ return self.add_lines(line_nr_before, [line])
[docs] def modify_line(self, line_nr, replacement): r""" Changes the given line with the given line number. The replacement will be there instead. Given an empty diff object: >>> diff = Diff(['Hey there! Gorgeous.\n', ... "It's nice that we're here.\n"]) We can change a line easily: >>> diff.modify_line(1, ... 'Hey there! This is sad.\n') >>> diff.modified ['Hey there! This is sad.\n', "It's nice that we're here.\n"] We can even merge changes within one line: >>> diff.modify_line(1, ... 'Hello. :( Gorgeous.\n') >>> diff.modified ['Hello. :( This is sad.\n', "It's nice that we're here.\n"] However, if we change something that has been changed before, we'll get a conflict: >>> diff.modify_line(1, 'Hello. This is not ok. Gorgeous.\n') Traceback (most recent call last): ... coalib.results.LineDiff.ConflictError: ... """ linediff = self._get_change(line_nr) if linediff.change is not False and linediff.change[1] != replacement: if len(replacement) == len(linediff.change[1]) == 1: raise ConflictError('Cannot merge the given line changes.') # The following diffs are created from strings, instead of lists. orig_diff = Diff.from_string_arrays(linediff.change[0], linediff.change[1]) new_diff = Diff.from_string_arrays(linediff.change[0], replacement) replacement = ''.join((orig_diff + new_diff)._raw_modified()) linediff.change = (self._file[line_nr-1], replacement) self._changes[line_nr] = linediff
[docs] def change_line(self, line_nr, original_line, replacement): logging.debug('Use of change_line method is deprecated. Instead ' 'use modify_line method, without the original_line ' 'argument') self.modify_line(line_nr, replacement)
[docs] def replace(self, range, replacement): r""" Replaces a part of text. Allows to span multiple lines. This function uses ``add_lines`` and ``delete_lines`` accordingly, so calls of those functions on lines given ``range`` affects after usage or vice versa lead to ``ConflictError``. >>> from coalib.results.TextRange import TextRange >>> test_text = ['hello\n', 'world\n', '4lines\n', 'done\n'] >>> def replace(range, text): ... diff = Diff(test_text) ... diff.replace(range, text) ... return diff.modified >>> replace(TextRange.from_values(1, 5, 4, 3), '\nyeah\ncool\nno') ['hell\n', 'yeah\n', 'cool\n', 'none\n'] >>> replace(TextRange.from_values(2, 1, 3, 5), 'b') ['hello\n', 'bes\n', 'done\n'] >>> replace(TextRange.from_values(1, 6, 4, 3), '') ['hellone\n'] :param range: The ``TextRange`` that gets replaced. :param replacement: The replacement string. Can be multiline. """ # Remaining parts of the lines not affected by the replace. first_part = ( self._file[range.start.line - 1][:range.start.column - 1]) last_part = self._file[range.end.line - 1][range.end.column - 1:] self.delete_lines(range.start.line, range.end.line) self.add_lines(range.start.line - 1, (first_part + replacement + last_part).splitlines(True))
[docs] def insert(self, position, text): r""" Inserts (multiline) text at arbitrary position. >>> from coalib.results.TextPosition import TextPosition >>> test_text = ['123\n', '456\n', '789\n'] >>> def insert(position, text): ... diff = Diff(test_text) ... diff.insert(position, text) ... return diff.modified >>> insert(TextPosition(2, 3), 'woopy doopy') ['123\n', '45woopy doopy6\n', '789\n'] >>> insert(TextPosition(1, 1), 'woopy\ndoopy') ['woopy\n', 'doopy123\n', '456\n', '789\n'] >>> insert(TextPosition(2, 4), '\nwoopy\ndoopy\n') ['123\n', '456\n', 'woopy\n', 'doopy\n', '\n', '789\n'] :param position: The ``TextPosition`` where to insert text. :param text: The text to insert. """ self.replace(TextRange(position, position), text)
[docs] def remove(self, range): r""" Removes a piece of text in a given range. >>> from coalib.results.TextRange import TextRange >>> test_text = ['nice\n', 'try\n', 'bro\n'] >>> def remove(range): ... diff = Diff(test_text) ... diff.remove(range) ... return diff.modified >>> remove(TextRange.from_values(1, 1, 1, 4)) ['e\n', 'try\n', 'bro\n'] >>> remove(TextRange.from_values(1, 5, 2, 1)) ['nicetry\n', 'bro\n'] >>> remove(TextRange.from_values(1, 3, 3, 2)) ['niro\n'] >>> remove(TextRange.from_values(2, 1, 2, 1)) ['nice\n', 'try\n', 'bro\n'] :param range: The range to delete. """ self.replace(range, '')
@staticmethod def _add_linebreaks(lines): """ Validate that each line in lines ends with a newline character and appends one if that is not the case. :param lines: A list of strings, representing lines. """ return [line if line.endswith('\n') else line + '\n' for line in lines] @staticmethod def _generate_linebreaks(lines): """ Validate that each line in lines ends with a newline character and appends one if that is not the case. Exception is the last line in the list. :param lines: A list of strings, representing lines. """ if lines == []: return [] return Diff._add_linebreaks(lines[:-1]) + [lines[-1]]