Source code for coalib.results.ResultFilter

import copy
from difflib import SequenceMatcher

from coalib.results.Diff import ConflictError, Diff
from coalib.results.SourceRange import SourceRange


[docs]def filter_results(original_file_dict,
                   modified_file_dict,
                   original_results,
                   modified_results):
    """
    Filters results for such ones that are unique across file changes

    :param original_file_dict: Dict of lists of file contents before  changes
    :param modified_file_dict: Dict of lists of file contents after changes
    :param original_results:   List of results of the old files
    :param modified_results:   List of results of the new files
    :return:                   List of results from new files that are unique
                               from all those that existed in the old changes
    """

    renamed_files = ensure_files_present(original_file_dict,
                                         modified_file_dict)
    # diffs_dict[file] is a diff between the original and modified file
    diffs_dict = {}
    for file in original_file_dict:
        diffs_dict[file] = Diff.from_string_arrays(
            original_file_dict[file],
            modified_file_dict[renamed_files.get(file, file)])

    orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
                                                            original_file_dict)

    mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
                                                           modified_file_dict)

    unique_results = []

    for m_r in reversed(modified_results):
        unique = True

        for o_r in original_results:

            if basics_match(o_r, m_r):
                if source_ranges_match(original_file_dict,
                                       diffs_dict,
                                       orig_result_diff_dict_dict[o_r],
                                       mod_result_diff_dict_dict[m_r],
                                       renamed_files):

                    # at least one original result matches completely
                    unique = False
                    break
        if unique:
            unique_results.append(m_r)

    return unique_results


[docs]def basics_match(original_result,
                 modified_result):
    """
    Checks whether the following properties of two results match:
    * origin
    * message
    * severity
    * debug_msg

    :param original_result: A result of the old files
    :param modified_result: A result of the new files
    :return:                Boolean value whether or not the properties match
    """

    return all(getattr(original_result, member) ==
               getattr(modified_result, member)
               for member in ['origin', 'message', 'severity', 'debug_msg'])


[docs]def source_ranges_match(original_file_dict,
                        diff_dict,
                        original_result_diff_dict,
                        modified_result_diff_dict,
                        renamed_files):
    """
    Checks whether the SourceRanges of two results match

    :param original_file_dict: Dict of lists of file contents before changes
    :param diff_dict:          Dict of diffs describing the changes per file
    :param original_result_diff_dict: diff for each file for this result
    :param modified_result_diff_dict: guess
    :param renamed_files:   A dictionary containing file renamings across runs
    :return:                     Boolean value whether the SourceRanges match
    """
    for file_name in original_file_dict:

        try:  # fails if the affected range of the result get's modified
            original_total_diff = (diff_dict[file_name] +
                                   original_result_diff_dict[file_name])
        except ConflictError:
            return False

        # original file with file_diff and original_diff applied
        original_total_file = original_total_diff.modified
        # modified file with modified_diff applied
        modified_total_file = modified_result_diff_dict[
            renamed_files.get(file_name, file_name)].modified
        if original_total_file != modified_total_file:
            return False
    return True


[docs]def remove_range(file_contents, source_range):
    """
    removes the chars covered by the sourceRange from the file

    :param file_contents: list of lines in the file
    :param source_range:  Source Range
    :return:              list of file contents without specified chars removed
    """
    if not file_contents:
        return []

    newfile = list(file_contents)
    # attention: line numbers in the SourceRange are human-readable,
    # list indices start with 0

    source_range = source_range.expand(file_contents)

    if source_range.start.line == source_range.end.line:
        # if it's all in one line, replace the line by it's beginning and end
        newfile[source_range.start.line - 1] = (
            newfile[source_range.start.line - 1][:source_range.start.column-1]
            + newfile[source_range.start.line - 1][source_range.end.column:])
        if newfile[source_range.start.line - 1] == '':
            del newfile[source_range.start.line - 1]
    else:
        # cut away after start
        newfile[source_range.start.line - 1] = (
            newfile[source_range.start.line - 1][:source_range.start.column-1])

        # cut away before end
        newfile[source_range.end.line - 1] = (
            newfile[source_range.end.line - 1][source_range.end.column:])

        # start: index = first line number ==> line after first line
        # end: index = last line -2 ==> line before last line

        for i in reversed(range(
                source_range.start.line, source_range.end.line - 1)):
            del newfile[i]

        # remove leftover empty lines
        # the first line here is actually the former `source_range.end.line -1`
        if newfile[source_range.start.line] == '':
            del newfile[source_range.start.line]
        if newfile[source_range.start.line - 1] == '':
            del newfile[source_range.start.line - 1]

    return newfile


[docs]def remove_result_ranges_diffs(result_list, file_dict):
    """
    Calculates the diffs to all files in file_dict that describe the removal of
    each respective result's affected code.

    :param result_list: list of results
    :param file_dict:   dict of file contents
    :return:            returnvalue[result][file] is a diff of the changes the
                        removal of this result's affected code would cause for
                        the file.
    """
    result_diff_dict_dict = {}
    for original_result in result_list:
        mod_file_dict = copy.deepcopy(file_dict)

        # gather all source ranges from this result
        source_ranges = []

        # SourceRanges must be sorted backwards and overlaps must be eliminated
        # this way, the deletion based on sourceRanges is not offset by
        # previous deletions in the same line that invalidate the indices.
        previous = None

        for source_range in sorted(original_result.affected_code, reverse=True):
            # previous exists and overlaps
            if previous is not None and source_range.overlaps(previous):
                combined_sr = SourceRange.join(previous, source_range)
                previous = combined_sr
            elif previous is None:
                previous = source_range
            # previous exists but it doesn't overlap
            else:
                source_ranges.append(previous)
                previous = source_range
        # don't forget last entry if there were any:
        if previous:
            source_ranges.append(previous)

        for source_range in source_ranges:
            file_name = source_range.file
            new_file = remove_range(mod_file_dict[file_name],
                                    source_range)
            mod_file_dict[file_name] = new_file

        diff_dict = {}
        for file_name in file_dict:
            diff_dict[file_name] = Diff.from_string_arrays(
                file_dict[file_name],
                mod_file_dict[file_name])

        result_diff_dict_dict[original_result] = diff_dict

    return result_diff_dict_dict


[docs]def ensure_files_present(original_file_dict, modified_file_dict):
    """
    Ensures that all files are available as keys in both dicts.

    :param original_file_dict: Dict of lists of file contents before  changes
    :param modified_file_dict: Dict of lists of file contents after changes
    :return:                   Return a dictionary of renamed files.
    """
    original_files = set(original_file_dict.keys())
    modified_files = set(modified_file_dict.keys())
    affected_files = original_files | modified_files
    original_unique_files = affected_files - modified_files
    renamed_files_dict = {}
    for file in filter(
            lambda filter_file: filter_file not in original_files,
            affected_files):
        for comparable_file in original_unique_files:
            s = SequenceMatcher(
                None,
                ''.join(modified_file_dict[file]),
                ''.join(original_file_dict[comparable_file]))
            if s.real_quick_ratio() >= 0.5 and s.ratio() > 0.5:
                renamed_files_dict[comparable_file] = file
                break
        else:
            original_file_dict[file] = []
    for file in filter(
            lambda filter_file: filter_file not in modified_files,
            affected_files):
        modified_file_dict[file] = []
    return renamed_files_dict
Source code for coalib.results.ResultFilter

coala

Navigation

Related Topics