Source code for coalib.parsing.LineParser

import logging
import re

from coala_utils.string_processing.StringConverter import StringConverter
from coala_utils.string_processing import (unescape, convert_to_raw,
                                           position_is_escaped,
                                           unescaped_rstrip)


[docs]class LineParser: def __init__(self, key_value_delimiters=('=',), comment_separators=('#',), key_delimiters=(',', ' '), section_name_surroundings=None, section_override_delimiters=('.',), key_value_append_delimiters=('+=',)): """ Creates a new line parser. Please note that no delimiter or separator may be an "o" or you may encounter undefined behaviour with the escapes. :param key_value_delimiters: Delimiters that delimit a key from a value. :param comment_separators: Used to initiate a comment. :param key_delimiters: Delimiters between several keys. :param section_name_surroundings: Dictionary, e.g. {"[", "]"} means a section name is surrounded by []. If None, {"[": "]"} is used as default. :param section_override_delimiters: Delimiter for a section override. E.g. "." would mean that section.key is a possible key that puts the key into the section "section" despite of the current section. :param key_value_append_delimiters: Delimiters to separate key and value in setting arguments where settings are being appended. """ section_name_surroundings = ( {'[': ']'} if section_name_surroundings is None else section_name_surroundings) self.key_value_delimiters = key_value_delimiters self.key_value_append_delimiters = key_value_append_delimiters self.comment_separators = comment_separators self.key_delimiters = key_delimiters self.section_name_surroundings = section_name_surroundings self.section_override_delimiters = section_override_delimiters
[docs] def parse(self, line): """ Note that every value in the returned tuple *besides the value* is unescaped. This is so since the value is meant to be put into a Setting later thus the escapes may be needed there. :param line: The line to parse. :return: section_name (empty string if it's no section name), [(section_override, key), ...], value, comment """ logging.warning('The parse method of LineParser is deprecated and will' ' be removed. Please use `_parse` which has a new ' 'return type, a tuple containing 5 values instead of ' '4. Refer to the method documentation for further ' 'information.') section_name, key_tuples, value, _, comment = self._parse(line) return section_name, key_tuples, value, comment
def _parse(self, line): """ Note that every value in the returned tuple *besides the value* is unescaped. This is so since the value is meant to be put into a Setting later thus the escapes may be needed there. :param line: The line to parse. :return: section_name (empty string if it's no section name), [(section_override, key), ...], value, to_append (True if append delimiter is found else False), comment """ for separator in self.comment_separators: if (re.match('[^ ]' + separator, line) or re.match(separator + '[^ ]', line)): logging.warning('This comment does not have whitespace' + ' before or after ' + separator + ' in: ' + repr(line.replace('\n', '')) + '. If you ' + 'didn\'t mean to make a comment, use a ' + 'backslash for escaping.') line, comment = self.__separate_by_first_occurrence( line, self.comment_separators) comment = unescape(comment) if line == '': return '', [], '', False, comment section_name = unescape(self.__get_section_name(line)) if section_name != '': return section_name, [], '', False, comment # Escapes in value might be needed by the bears append = True keys, value = self.__extract_keys_and_value( line, self.key_value_append_delimiters) if not value: keys, value = self.__extract_keys_and_value( line, self.key_value_delimiters, True) append = False # Add all the delimiters that stored as tuples all_delimiters = self.key_value_delimiters all_delimiters += self.key_value_append_delimiters all_delimiters += self.key_delimiters all_delimiters += self.comment_separators all_delimiters += self.section_override_delimiters all_delimiters = ''.join(all_delimiters) # Add all keys and values in section_name_surroundings, which is # stored as a dict all_delimiters += ''.join(self.section_name_surroundings.keys()) all_delimiters += ''.join(self.section_name_surroundings.values()) value = convert_to_raw(value, all_delimiters) key_tuples = [] for key in keys: key = convert_to_raw(key, all_delimiters) section, key = self.__separate_by_first_occurrence( key, self.section_override_delimiters, True, True) key_tuples.append((unescape(section), unescape(key))) return '', key_tuples, value, append, comment @staticmethod def __separate_by_first_occurrence(string, delimiters, strip_delim=False, return_second_part_nonempty=False): """ Separates a string by the first of all given delimiters. Any whitespace characters will be stripped away from the parts. :param string: The string to separate. :param delimiters: The delimiters. :param strip_delim: Strips the delimiter from the result if true. :param return_second_part_nonempty: If no delimiter is found and this is true the contents of the string will be returned in the second part of the tuple instead of the first one. :return: (first_part, second_part) """ temp_string = string.replace('\\\\', 'oo') i = temp_string.find('\\') while i != -1: temp_string = temp_string[:i] + 'oo' + temp_string[i+2:] i = temp_string.find('\\', i+2) delim_pos = len(string) used_delim = '' for delim in delimiters: pos = temp_string.find(delim) if 0 <= pos < delim_pos: delim_pos = pos used_delim = delim if return_second_part_nonempty and delim_pos == len(string): return '', string.strip(' \n') first_part = string[:delim_pos] second_part = string[delim_pos + ( len(used_delim) if strip_delim else 0):] if not position_is_escaped(second_part, len(second_part) - 1): first_part = unescaped_rstrip(first_part) second_part = unescaped_rstrip(second_part) return (first_part.lstrip().rstrip('\n'), second_part.lstrip().rstrip('\n')) def __get_section_name(self, line): for begin, end in self.section_name_surroundings.items(): if (line[0:len(begin)] == begin and line[len(line) - len(end):len(line)] == end): return line[len(begin):len(line) - len(end)].strip(' \n') return '' def __extract_keys_and_value(self, line, delimiters, return_second_part_nonempty=False): """ This method extracts the keys and values from the give string by splitting them based on the delimiters provided. :param line: The input string. :param delimiters: A list of delimiters to split the strings on. :param return_second_part_nonempty: If no delimiter is found and this is true the contents of the string will be returned as value :return: The parsed keys and values from a line. """ key_part, value = self.__separate_by_first_occurrence( line, delimiters, True, return_second_part_nonempty) keys = list(StringConverter( key_part, list_delimiters=self.key_delimiters).__iter__( remove_backslashes=False)) return keys, value