Module kiwipiepy.template

Expand source code
import re
import string
from typing import List, Optional, Tuple, Union

format_pattern = re.compile(r'(\{\{)|(\}\})|(\{[^}]*\})')

def _to_kiwi_tokens(token):
    from kiwipiepy import Token
    if isinstance(token, Token):
        return [token]
    
    if isinstance(token, tuple):
        if len(token) == 2 and isinstance(token[0], str) and isinstance(token[1], str):
            return [token]
        if len(token) == 3 and isinstance(token[0], str) and isinstance(token[1], str) and isinstance(token[2], bool):
            return [token]

    if isinstance(token, list):
        ret = []
        for t in token:
            if isinstance(t, Token):
                ret.append(t)
            elif isinstance(t, tuple):
                if len(t) == 2 and isinstance(t[0], str) and isinstance(t[1], str):
                    ret.append(t)
                elif len(t) == 3 and isinstance(t[0], str) and isinstance(t[1], str) and isinstance(t[2], bool):
                    ret.append(t)
                else:
                    return None
            else:
                return None
        return ret

    return None

class Template:
    def __init__(self,
        kiwi: 'Kiwi',
        format_str: str,
    ):
        from kiwipiepy._wrap import _convert_consonant
        self._kiwi = kiwi
        self._format_str = format_str
        self._formatter = string.Formatter()

        chunks = []
        offset = 0
        pretokenized_lists = []
        self._parsed_format = []
        implicit_field_index = 0
        has_explicit_field_index = False
        for literal, field, format, conversion in self._formatter.parse(format_str):
            literal = _convert_consonant(literal)
            chunks.append(literal)
            offset += len(literal)
            if field is not None:
                chunks.append('{}')
                pretokenized_lists.append((offset, offset + 2, 'SSC'))
                offset += 2
                if field.isdigit():
                    has_explicit_field_index = True
                    if implicit_field_index:
                        raise ValueError('cannot switch from manual field specification to automatic field numbering')
                if field == '':
                    if has_explicit_field_index:
                        raise ValueError('cannot switch from automatic field numbering to manual field specification')
                    field = str(implicit_field_index)
                    implicit_field_index += 1
            self._parsed_format.append(([], field, format, conversion))
        
        tokens = kiwi.tokenize(''.join(chunks), pretokenized=pretokenized_lists)
        placeholder_iter = iter(pretokenized_lists)
        next_placeholder = next(placeholder_iter, None)
        parsed_iter = iter(self._parsed_format)
        target_tokens = next(parsed_iter)[0]
        for token in tokens:
            if next_placeholder and token.span == next_placeholder[:2]:
                target_tokens = next(parsed_iter)[0]
                next_placeholder = next(placeholder_iter, None)
            else:
                target_tokens.append(token)
    
    def format(self,
        *args,
        **kwargs,
    ):
        all_tokens = []
        for tokens, field, format, conversion in self._parsed_format:
            all_tokens += tokens
            if field is None:
                continue

            value, _ = self._formatter.get_field(field, args, kwargs)
            tokens = _to_kiwi_tokens(value)
            if tokens and not conversion:
                if format: 
                    raise ValueError('cannot specify format specifier for Kiwi Token')
                all_tokens += tokens
            else:
                value = self._formatter.convert_field(value, conversion)
                value = self._formatter.format_field(value, format)
                all_tokens.append((value, 'SW'))

        return self._kiwi.join(all_tokens)

Classes

class Template (kiwi: Kiwi, format_str: str)
Expand source code
class Template:
    def __init__(self,
        kiwi: 'Kiwi',
        format_str: str,
    ):
        from kiwipiepy._wrap import _convert_consonant
        self._kiwi = kiwi
        self._format_str = format_str
        self._formatter = string.Formatter()

        chunks = []
        offset = 0
        pretokenized_lists = []
        self._parsed_format = []
        implicit_field_index = 0
        has_explicit_field_index = False
        for literal, field, format, conversion in self._formatter.parse(format_str):
            literal = _convert_consonant(literal)
            chunks.append(literal)
            offset += len(literal)
            if field is not None:
                chunks.append('{}')
                pretokenized_lists.append((offset, offset + 2, 'SSC'))
                offset += 2
                if field.isdigit():
                    has_explicit_field_index = True
                    if implicit_field_index:
                        raise ValueError('cannot switch from manual field specification to automatic field numbering')
                if field == '':
                    if has_explicit_field_index:
                        raise ValueError('cannot switch from automatic field numbering to manual field specification')
                    field = str(implicit_field_index)
                    implicit_field_index += 1
            self._parsed_format.append(([], field, format, conversion))
        
        tokens = kiwi.tokenize(''.join(chunks), pretokenized=pretokenized_lists)
        placeholder_iter = iter(pretokenized_lists)
        next_placeholder = next(placeholder_iter, None)
        parsed_iter = iter(self._parsed_format)
        target_tokens = next(parsed_iter)[0]
        for token in tokens:
            if next_placeholder and token.span == next_placeholder[:2]:
                target_tokens = next(parsed_iter)[0]
                next_placeholder = next(placeholder_iter, None)
            else:
                target_tokens.append(token)
    
    def format(self,
        *args,
        **kwargs,
    ):
        all_tokens = []
        for tokens, field, format, conversion in self._parsed_format:
            all_tokens += tokens
            if field is None:
                continue

            value, _ = self._formatter.get_field(field, args, kwargs)
            tokens = _to_kiwi_tokens(value)
            if tokens and not conversion:
                if format: 
                    raise ValueError('cannot specify format specifier for Kiwi Token')
                all_tokens += tokens
            else:
                value = self._formatter.convert_field(value, conversion)
                value = self._formatter.format_field(value, format)
                all_tokens.append((value, 'SW'))

        return self._kiwi.join(all_tokens)

Methods

def format(self, *args, **kwargs)
Expand source code
def format(self,
    *args,
    **kwargs,
):
    all_tokens = []
    for tokens, field, format, conversion in self._parsed_format:
        all_tokens += tokens
        if field is None:
            continue

        value, _ = self._formatter.get_field(field, args, kwargs)
        tokens = _to_kiwi_tokens(value)
        if tokens and not conversion:
            if format: 
                raise ValueError('cannot specify format specifier for Kiwi Token')
            all_tokens += tokens
        else:
            value = self._formatter.convert_field(value, conversion)
            value = self._formatter.format_field(value, format)
            all_tokens.append((value, 'SW'))

    return self._kiwi.join(all_tokens)