header.py

import re
import linetypes
import datetime

class Header:
    '''
        Result type of header processing, this contains all the header
        information.
    '''
    def __init__(self):
        self.description = None
        self.name = None
        self.author = None
        self.username = None
        self.filetype = None
        self.qualifiers = None
        self.license = None
        self.help = ''
        self.bfc = None
        self.category = None
        self.keywords = ''
        self.cmdline = None
        self.history = []
        from collections import defaultdict
        self.occurrences = defaultdict(list)
    @property
    def valid(self):
        return True
    @property
    def effective_filetype(self):
        '''
            What's the effective file type? The "Unofficial_" prefix is
            left out.
        '''
        if self.filetype.startswith('Unofficial_'):
            return self.filetype.rsplit('Unofficial_')[1]
        else:
            return self.filetype
    @property
    def effective_category(self):
        '''
            Returns the category of the part. Leading punctuation marks
            are ignored.
        '''
        if self.category:
            return self.category
        else:
            import string
            category = self.description.split(' ', 1)[0]
            while category and category[0] in string.punctuation:
                category = category[1:]
            return category

class BadHeader:
    '''
        If header processing fails this object is returned as the resulting
        header instead. It contains the details of where the header could not
        be understood and why.
    '''
    def __init__(self, index, reason):
        self.index = index
        self.reason = reason
    def __repr__(self):
        return str.format(
            'header.BadHeader(index = {index!r}, reason = {reason!r})',
            index = self.index,
            reason = self.reason,
        )
    @property
    def valid(self):
        return False

def is_invertnext(entry):
    return isinstance(entry, linetypes.MetaCommand) \
        and entry.text == "BFC INVERTNEXT"

def is_suitable_header_object(entry):
    '''
        Is the given object something that we can consider to be
        part of the header?
    '''
    if is_invertnext(entry):
        # It's BFC INVERTNEXT, that's not a header command.
        return False
    # Check if it's one of the functional linetypes
    return not any(
        isinstance(entry, linetype)
        for linetype in [
            linetypes.SubfileReference,
            linetypes.LineSegment,
            linetypes.Triangle,
            linetypes.Quadrilateral,
            linetypes.ConditionalLine,
            linetypes.Comment,
            linetypes.Error,
        ]
    )

class HeaderError(Exception):
    '''
        An error raised during header parsing
    '''
    def __init__(self, index, reason):
        self.index, self.reason = index, reason
    def __repr__(self):
        return str.format(
            'HeaderError({index!r}, {reason!r})',
            index = self.index,
            reason = self.reason,
        )
    def __str__(self):
        return reason

class HistoryEntry:
    '''
        Represents a single !HISTORY entry
    '''
    def __init__(self, date, user, text):
        self.date, self.user, self.text = date, user, text
    def __repr__(self):
        return str.format(
            'HistoryEntry({date!r}, {user!r}, {text!r})',
            date = self.date,
            user = self.user,
            text = self.text)

class HeaderParser:
    def __init__(self):
        self.model_body = None
        self.cursor = 0
        self.problems = []
    def parse(self, model_body):
        result = Header()
        self.result = result
        self.order = []
        self.cursor = -1
        self.model_body = model_body
        self.skip_to_next()
        result.description = self.current()
        self.skip_to_next()
        result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0]
        self.skip_to_next()
        # Parse author line
        result.author, result.username = self.parse_pattern(r'^Author: (?:([^\[]+))?(?:\[([^\]]+)\])?', 'author')
        if isinstance(result.author, str):
            # clean leading spaces
            result.author = str.strip(result.author)
        if not result.author and not result.username:
            self.parse_error('author line does not contain a name nor username')
        # use more patterns to parse the rest of the header
        for header_entry in self.get_more_header_stuff():
            if self.try_to_match(
                r'^!LDRAW_ORG ' \
                r'((?:Unofficial_)?(?:' \
                    r'Part|' \
                    r'Subpart|' \
                    r'Primitive|' \
                    r'8_Primitive|' \
                    r'48_Primitive|' \
                    r'Shortcut' \
                r'))\s?' \
                r'(.*)$',
                'part type'):
                result.filetype = self.groups[0]
                result.qualifiers = re.findall(r'(?:Physical_Colour|Alias|ORIGINAL|UPDATE \d\d\d\d-\d\d)', self.groups[1])
            elif self.try_to_match(
                r'^!LICENSE (.+)$',
                'license'):
                result.license = self.groups[0]
            elif self.try_to_match(
                r'BFC (CERTIFY CW|CERTIFY CCW|NOCERTIFY)',
                'bfc'):
                result.bfc = self.groups[0]
            elif self.try_to_match(
                r'!HISTORY (\d{4}-\d{2}-\d{2}) ([\[{][^\]}]+[\]}]) (.+)$',
                'history'):
                try:
                    time_object = datetime.datetime.strptime(
                        self.groups[0],
                        '%Y-%m-%d',
                    )
                except ValueError:
                    self.parse_error("invalid ISO date in history")
                result.history.append(HistoryEntry(
                    date = time_object.date(),
                    user = self.groups[1],
                    text = self.groups[2],
                ))
            elif self.try_to_match(
                r'!HELP (.+)',
                'help'):
                if result.help:
                    result.help += '\n'
                result.help += self.groups[0]
            elif self.try_to_match(
                r'!CATEGORY (.+)',
                'category'):
                result.category = self.groups[0]
            elif self.try_to_match(
                r'!KEYWORDS (.+)',
                'keywords'):
                if result.keywords:
                    result.keywords += '\n'
                result.keywords += self.groups[0]
            elif self.try_to_match(
                r'!CMDLINE (.+)',
                'cmdline'):
                result.cmdline = self.groups[0]
            else:
                self.cursor -= 1
                break
        if not result.filetype:
            self.parse_error('LDRAW_ORG line is missing')
        return {
            'header': result,
            'end-index': self.cursor + 1, # record where the header ended
        }
    def parse_error(self, message):
        raise HeaderError(index = self.cursor, reason = message)
    def get_more_header_stuff(self):
        '''
            Iterates through the header and yields metacommand entries
            one after the other.
        '''
        self.cursor += 1
        new_cursor = self.cursor
        while new_cursor < len(self.model_body):
            entry = self.model_body[new_cursor]
            if not is_suitable_header_object(entry):
                # looks like the header ended
                break
            if isinstance(entry, linetypes.MetaCommand):
                self.cursor = new_cursor
                yield entry
            new_cursor += 1
    def skip_to_next(self, *, spaces_expected = 0):
        '''
            Skip to the next header line.
        '''
        while True:
            if self.cursor + 1 >= len(self.model_body):
                # wound up past the end of model
                self.parse_error('file does not have a proper header')
            self.cursor += 1
            entry = self.model_body[self.cursor]
            if not is_suitable_header_object(entry):
                self.parse_error('header is incomplete')
            if isinstance(entry, linetypes.MetaCommand):
                return
    def try_to_match(self, pattern, patterntype):
        '''
            Tries to parse the specified pattern and to store the groups in
            self.groups. Returns whether or not this succeeded.
        '''
        try:
            self.groups = self.parse_pattern(pattern, patterntype)
            return True
        except:
            return False
    def current(self):
        '''
            Returns the text of the header line we're currently processing.
        '''
        entry = self.model_body[self.cursor]
        assert isinstance(entry, linetypes.MetaCommand)
        return entry.text
    def parse_pattern(self, pattern, description):
        '''
            Matches the current header line against the specified pattern.
            If not, raises an exception. See try_to_match for a softer wrapper
            that does not raise exceptions.
        '''
        match = re.search(pattern, self.current())
        if match:
            self.order.append(description)
            list.append(self.result.occurrences[description], self.cursor)
            return match.groups()
        else:
            self.parse_error(str.format("couldn't parse {}", description))