Source code for coalib.parsing.LineParser

from coala_utils.string_processing.StringConverter import StringConverter
from coala_utils.string_processing import (unescape, convert_to_raw,
                                           position_is_escaped,
                                           unescaped_rstrip)


[docs]class LineParser: def __init__(self, key_value_delimiters=('=',), comment_separators=('#',), key_delimiters=(',', ' '), section_name_surroundings=None, section_override_delimiters=('.',)): """ Creates a new line parser. Please note that no delimiter or separator may be an "o" or you may encounter undefined behaviour with the escapes. :param key_value_delimiters: Delimiters that delimit a key from a value. :param comment_separators: Used to initiate a comment. :param key_delimiters: Delimiters between several keys. :param section_name_surroundings: Dictionary, e.g. {"[", "]"} means a section name is surrounded by []. If None, {"[": "]"} is used as default. :param section_override_delimiters: Delimiter for a section override. E.g. "." would mean that section.key is a possible key that puts the key into the section "section" despite of the current section. """ section_name_surroundings = ( {'[': ']'} if section_name_surroundings is None else section_name_surroundings) self.key_value_delimiters = key_value_delimiters self.comment_separators = comment_separators self.key_delimiters = key_delimiters self.section_name_surroundings = section_name_surroundings self.section_override_delimiters = section_override_delimiters
[docs] def parse(self, line): """ Note that every value in the returned tuple *besides the value* is unescaped. This is so since the value is meant to be put into a Setting later thus the escapes may be needed there. :param line: The line to parse. :return: section_name (empty string if it's no section name), [(section_override, key), ...], value, comment """ line, comment = self.__separate_by_first_occurrence( line, self.comment_separators) comment = unescape(comment) if line == '': return '', [], '', comment section_name = unescape(self.__get_section_name(line)) if section_name != '': return section_name, [], '', comment # Escapes in value might be needed by the bears keys, value = self.__extract_keys_and_value(line) # Add all the delimiters that stored as tuples all_delimiters = self.key_value_delimiters all_delimiters += self.key_delimiters all_delimiters += self.comment_separators all_delimiters += self.section_override_delimiters all_delimiters = ''.join(all_delimiters) # Add all keys and values in section_name_surroundings, which is # stored as a dict all_delimiters += ''.join(self.section_name_surroundings.keys()) all_delimiters += ''.join(self.section_name_surroundings.values()) value = convert_to_raw(value, all_delimiters) key_tuples = [] for key in keys: key = convert_to_raw(key, all_delimiters) section, key = self.__separate_by_first_occurrence( key, self.section_override_delimiters, True, True) key_tuples.append((unescape(section), unescape(key))) return '', key_tuples, value, comment
@staticmethod def __separate_by_first_occurrence(string, delimiters, strip_delim=False, return_second_part_nonempty=False): """ Separates a string by the first of all given delimiters. Any whitespace characters will be stripped away from the parts. :param string: The string to separate. :param delimiters: The delimiters. :param strip_delim: Strips the delimiter from the result if true. :param return_second_part_nonempty: If no delimiter is found and this is true the contents of the string will be returned in the second part of the tuple instead of the first one. :return: (first_part, second_part) """ temp_string = string.replace('\\\\', 'oo') i = temp_string.find('\\') while i != -1: temp_string = temp_string[:i] + 'oo' + temp_string[i+2:] i = temp_string.find('\\', i+2) delim_pos = len(string) used_delim = '' for delim in delimiters: pos = temp_string.find(delim) if 0 <= pos < delim_pos: delim_pos = pos used_delim = delim if return_second_part_nonempty and delim_pos == len(string): return '', string.strip(' \n') first_part = string[:delim_pos] second_part = string[delim_pos + ( len(used_delim) if strip_delim else 0):] if not position_is_escaped(second_part, len(second_part) - 1): first_part = unescaped_rstrip(first_part) second_part = unescaped_rstrip(second_part) return (first_part.lstrip().rstrip('\n'), second_part.lstrip().rstrip('\n')) def __get_section_name(self, line): for begin, end in self.section_name_surroundings.items(): if (line[0:len(begin)] == begin and line[len(line) - len(end):len(line)] == end): return line[len(begin):len(line) - len(end)].strip(' \n') return '' def __extract_keys_and_value(self, line): key_part, value = self.__separate_by_first_occurrence( line, self.key_value_delimiters, True, True) keys = list(StringConverter( key_part, list_delimiters=self.key_delimiters).__iter__( remove_backslashes=False)) return keys, value