Source code for coalib.results.ResultFilter
import copy
from difflib import SequenceMatcher
from coalib.results.Diff import ConflictError, Diff
from coalib.results.SourceRange import SourceRange
[docs]def filter_results(original_file_dict,
modified_file_dict,
original_results,
modified_results):
"""
Filters results for such ones that are unique across file changes
:param original_file_dict: Dict of lists of file contents before changes
:param modified_file_dict: Dict of lists of file contents after changes
:param original_results: List of results of the old files
:param modified_results: List of results of the new files
:return: List of results from new files that are unique
from all those that existed in the old changes
"""
renamed_files = ensure_files_present(original_file_dict,
modified_file_dict)
# diffs_dict[file] is a diff between the original and modified file
diffs_dict = {}
for file in original_file_dict:
diffs_dict[file] = Diff.from_string_arrays(
original_file_dict[file],
modified_file_dict[renamed_files.get(file, file)])
orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
original_file_dict)
mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
modified_file_dict)
unique_results = []
for m_r in reversed(modified_results):
unique = True
for o_r in original_results:
if basics_match(o_r, m_r):
if source_ranges_match(original_file_dict,
diffs_dict,
orig_result_diff_dict_dict[o_r],
mod_result_diff_dict_dict[m_r],
renamed_files):
# at least one original result matches completely
unique = False
break
if unique:
unique_results.append(m_r)
return unique_results
[docs]def basics_match(original_result,
modified_result):
"""
Checks whether the following properties of two results match:
* origin
* message
* severity
* debug_msg
:param original_result: A result of the old files
:param modified_result: A result of the new files
:return: Boolean value whether or not the properties match
"""
return all(getattr(original_result, member) ==
getattr(modified_result, member)
for member in ['origin', 'message', 'severity', 'debug_msg'])
[docs]def source_ranges_match(original_file_dict,
diff_dict,
original_result_diff_dict,
modified_result_diff_dict,
renamed_files):
"""
Checks whether the SourceRanges of two results match
:param original_file_dict: Dict of lists of file contents before changes
:param diff_dict: Dict of diffs describing the changes per file
:param original_result_diff_dict: diff for each file for this result
:param modified_result_diff_dict: guess
:param renamed_files: A dictionary containing file renamings across runs
:return: Boolean value whether the SourceRanges match
"""
for file_name in original_file_dict:
try: # fails if the affected range of the result get's modified
original_total_diff = (diff_dict[file_name] +
original_result_diff_dict[file_name])
except ConflictError:
return False
# original file with file_diff and original_diff applied
original_total_file = original_total_diff.modified
# modified file with modified_diff applied
modified_total_file = modified_result_diff_dict[
renamed_files.get(file_name, file_name)].modified
if original_total_file != modified_total_file:
return False
return True
[docs]def remove_range(file_contents, source_range):
"""
removes the chars covered by the sourceRange from the file
:param file_contents: list of lines in the file
:param source_range: Source Range
:return: list of file contents without specified chars removed
"""
if not file_contents:
return []
newfile = list(file_contents)
# attention: line numbers in the SourceRange are human-readable,
# list indices start with 0
source_range = source_range.expand(file_contents)
if source_range.start.line == source_range.end.line:
# if it's all in one line, replace the line by it's beginning and end
newfile[source_range.start.line - 1] = (
newfile[source_range.start.line - 1][:source_range.start.column-1]
+ newfile[source_range.start.line - 1][source_range.end.column:])
if newfile[source_range.start.line - 1] == '':
del newfile[source_range.start.line - 1]
else:
# cut away after start
newfile[source_range.start.line - 1] = (
newfile[source_range.start.line - 1][:source_range.start.column-1])
# cut away before end
newfile[source_range.end.line - 1] = (
newfile[source_range.end.line - 1][source_range.end.column:])
# start: index = first line number ==> line after first line
# end: index = last line -2 ==> line before last line
for i in reversed(range(
source_range.start.line, source_range.end.line - 1)):
del newfile[i]
# remove leftover empty lines
# the first line here is actually the former `source_range.end.line -1`
if newfile[source_range.start.line] == '':
del newfile[source_range.start.line]
if newfile[source_range.start.line - 1] == '':
del newfile[source_range.start.line - 1]
return newfile
[docs]def remove_result_ranges_diffs(result_list, file_dict):
"""
Calculates the diffs to all files in file_dict that describe the removal of
each respective result's affected code.
:param result_list: list of results
:param file_dict: dict of file contents
:return: returnvalue[result][file] is a diff of the changes the
removal of this result's affected code would cause for
the file.
"""
result_diff_dict_dict = {}
for original_result in result_list:
mod_file_dict = copy.deepcopy(file_dict)
# gather all source ranges from this result
source_ranges = []
# SourceRanges must be sorted backwards and overlaps must be eliminated
# this way, the deletion based on sourceRanges is not offset by
# previous deletions in the same line that invalidate the indices.
previous = None
for source_range in sorted(original_result.affected_code, reverse=True):
# previous exists and overlaps
if previous is not None and source_range.overlaps(previous):
combined_sr = SourceRange.join(previous, source_range)
previous = combined_sr
elif previous is None:
previous = source_range
# previous exists but it doesn't overlap
else:
source_ranges.append(previous)
previous = source_range
# don't forget last entry if there were any:
if previous:
source_ranges.append(previous)
for source_range in source_ranges:
file_name = source_range.file
new_file = remove_range(mod_file_dict[file_name],
source_range)
mod_file_dict[file_name] = new_file
diff_dict = {}
for file_name in file_dict:
diff_dict[file_name] = Diff.from_string_arrays(
file_dict[file_name],
mod_file_dict[file_name])
result_diff_dict_dict[original_result] = diff_dict
return result_diff_dict_dict
[docs]def ensure_files_present(original_file_dict, modified_file_dict):
"""
Ensures that all files are available as keys in both dicts.
:param original_file_dict: Dict of lists of file contents before changes
:param modified_file_dict: Dict of lists of file contents after changes
:return: Return a dictionary of renamed files.
"""
original_files = set(original_file_dict.keys())
modified_files = set(modified_file_dict.keys())
affected_files = original_files | modified_files
original_unique_files = affected_files - modified_files
renamed_files_dict = {}
for file in filter(
lambda filter_file: filter_file not in original_files,
affected_files):
for comparable_file in original_unique_files:
s = SequenceMatcher(
None,
''.join(modified_file_dict[file]),
''.join(original_file_dict[comparable_file]))
if s.real_quick_ratio() >= 0.5 and s.ratio() > 0.5:
renamed_files_dict[comparable_file] = file
break
else:
original_file_dict[file] = []
for file in filter(
lambda filter_file: filter_file not in modified_files,
affected_files):
modified_file_dict[file] = []
return renamed_files_dict