Source code for coalib.parsing.LineParser

from coala_utils.string_processing.StringConverter import StringConverter
from coala_utils.string_processing import (unescape, convert_to_raw,
                                           position_is_escaped,
                                           unescaped_rstrip)


[docs]class LineParser:

    def __init__(self,
                 key_value_delimiters=('=',),
                 comment_separators=('#',),
                 key_delimiters=(',', ' '),
                 section_name_surroundings=None,
                 section_override_delimiters=('.',)):
        """
        Creates a new line parser. Please note that no delimiter or separator
        may be an "o" or you may encounter undefined behaviour with the
        escapes.

        :param key_value_delimiters:        Delimiters that delimit a key from
                                            a value.
        :param comment_separators:          Used to initiate a comment.
        :param key_delimiters:              Delimiters between several keys.
        :param section_name_surroundings:   Dictionary, e.g. {"[", "]"} means a
                                            section name is surrounded by [].
                                            If None, {"[": "]"} is used as
                                            default.
        :param section_override_delimiters: Delimiter for a section override.
                                            E.g. "." would mean that
                                            section.key is a possible key that
                                            puts the key into the section
                                            "section" despite of the current
                                            section.
        """
        section_name_surroundings = (
            {'[': ']'} if section_name_surroundings is None
            else section_name_surroundings)

        self.key_value_delimiters = key_value_delimiters
        self.comment_separators = comment_separators
        self.key_delimiters = key_delimiters
        self.section_name_surroundings = section_name_surroundings
        self.section_override_delimiters = section_override_delimiters

[docs]    def parse(self, line):
        """
        Note that every value in the returned tuple *besides the value* is
        unescaped. This is so since the value is meant to be put into a Setting
        later thus the escapes may be needed there.

        :param line: The line to parse.
        :return:     section_name (empty string if it's no section name),
                     [(section_override, key), ...], value, comment
        """
        line, comment = self.__separate_by_first_occurrence(
            line,
            self.comment_separators)
        comment = unescape(comment)
        if line == '':
            return '', [], '', comment

        section_name = unescape(self.__get_section_name(line))
        if section_name != '':
            return section_name, [], '', comment

        # Escapes in value might be needed by the bears
        keys, value = self.__extract_keys_and_value(line)

        # Add all the delimiters that stored as tuples
        all_delimiters = self.key_value_delimiters
        all_delimiters += self.key_delimiters
        all_delimiters += self.comment_separators
        all_delimiters += self.section_override_delimiters
        all_delimiters = ''.join(all_delimiters)

        # Add all keys and values in section_name_surroundings, which is
        # stored as a dict
        all_delimiters += ''.join(self.section_name_surroundings.keys())
        all_delimiters += ''.join(self.section_name_surroundings.values())

        value = convert_to_raw(value, all_delimiters)

        key_tuples = []
        for key in keys:
            key = convert_to_raw(key, all_delimiters)
            section, key = self.__separate_by_first_occurrence(
                key,
                self.section_override_delimiters,
                True,
                True)
            key_tuples.append((unescape(section), unescape(key)))

        return '', key_tuples, value, comment

    @staticmethod
    def __separate_by_first_occurrence(string,
                                       delimiters,
                                       strip_delim=False,
                                       return_second_part_nonempty=False):
        """
        Separates a string by the first of all given delimiters. Any whitespace
        characters will be stripped away from the parts.

        :param string:                      The string to separate.
        :param delimiters:                  The delimiters.
        :param strip_delim:                 Strips the delimiter from the
                                            result if true.
        :param return_second_part_nonempty: If no delimiter is found and this
                                            is true the contents of the string
                                            will be returned in the second part
                                            of the tuple instead of the first
                                            one.
        :return:                            (first_part, second_part)
        """
        temp_string = string.replace('\\\\', 'oo')
        i = temp_string.find('\\')
        while i != -1:
            temp_string = temp_string[:i] + 'oo' + temp_string[i+2:]
            i = temp_string.find('\\', i+2)

        delim_pos = len(string)
        used_delim = ''
        for delim in delimiters:
            pos = temp_string.find(delim)
            if 0 <= pos < delim_pos:
                delim_pos = pos
                used_delim = delim

        if return_second_part_nonempty and delim_pos == len(string):
            return '', string.strip(' \n')

        first_part = string[:delim_pos]
        second_part = string[delim_pos + (
            len(used_delim) if strip_delim else 0):]

        if not position_is_escaped(second_part, len(second_part) - 1):
            first_part = unescaped_rstrip(first_part)
            second_part = unescaped_rstrip(second_part)

        return (first_part.lstrip().rstrip('\n'),
                second_part.lstrip().rstrip('\n'))

    def __get_section_name(self, line):
        for begin, end in self.section_name_surroundings.items():
            if (line[0:len(begin)] == begin and
                    line[len(line) - len(end):len(line)] == end):
                return line[len(begin):len(line) - len(end)].strip(' \n')

        return ''

    def __extract_keys_and_value(self, line):
        key_part, value = self.__separate_by_first_occurrence(
            line,
            self.key_value_delimiters,
            True,
            True)
        keys = list(StringConverter(
            key_part,
            list_delimiters=self.key_delimiters).__iter__(
            remove_backslashes=False))

        return keys, value