header.py

import re
import linetypes
import datetime

class Header:
    def __init__(self):
        self.description = None
        self.name = None
        self.author = None
        self.username = None
        self.filetype = None
        self.qualifiers = None
        self.license = None
        self.help = ''
        self.bfc = None
        self.category = None
        self.keywords = ''
        self.cmdline = None
        self.history = []
        self.first_occurrence = dict()
    @property
    def valid(self):
        return True
    @property
    def effective_filetype(self):
        if self.filetype.startswith('Unofficial_'):
            return self.filetype.rsplit('Unofficial_')[1]
        else:
            return self.filetype

class BadHeader:
    def __init__(self, index, reason):
        self.index = index
        self.reason = reason
    def __repr__(self):
        return str.format(
            'header.BadHeader(index = {index!r}, reason = {reason!r})',
            index = self.index,
            reason = self.reason,
        )
    @property
    def valid(self):
        return False

geometrical_types = [
    linetypes.LineSegment,
    linetypes.Triangle,
    linetypes.Quadrilateral,
    linetypes.ConditionalLine,
]

def is_invertnext(entry):
    return isinstance(entry, linetypes.MetaCommand) \
        and entry.text == "BFC INVERTNEXT"

def is_suitable_header_object(entry):
    if is_invertnext(entry):
        # BFC INVERTNEXT is not a header command anymore.
        return False
    return not any(
        isinstance(entry, linetype)
        for linetype in [
            *geometrical_types,
            linetypes.Comment,
            linetypes.Error,
        ]
    )

class HeaderError(Exception):
    def __init__(self, index, reason):
        self.index, self.reason = index, reason
    def __repr__(self):
        return str.format(
            'HeaderError({index!r}, {reason!r})',
            index = self.index,
            reason = self.reason,
        )
    def __str__(self):
        return reason

class HistoryEntry:
    def __init__(self, date, user, text):
        self.date, self.user, self.text = date, user, text
    def __repr__(self):
        return str.format(
            'HistoryEntry({date!r}, {user!r}, {text!r})',
            date = self.date,
            user = self.user,
            text = self.text)

class HeaderParser:
    def __init__(self):
        self.model_body = None
        self.cursor = 0
        self.problems = []
    def parse(self, model_body):
        result = Header()
        self.result = result
        self.order = []
        self.cursor = -1
        self.model_body = model_body
        self.skip_to_next()
        result.description = self.current()
        self.skip_to_next()
        result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0]
        self.skip_to_next()
        result.author, result.username = self.parse_pattern(r'^Author: ([^ \[]*[^\[]+) (?:\[([^\]]+)\])?', 'author')
        for header_entry in self.get_more_header_stuff():
            if self.try_to_match(
                r'^!LDRAW_ORG ' \
                r'((?:Unofficial_)?(?:' \
                    r'Part|' \
                    r'Subpart|' \
                    r'Primitive|' \
                    r'8_Primitive|' \
                    r'48_Primitive|' \
                    r'Shortcut' \
                r'))\s?' \
                r'(.*)$',
                'part type'):
                result.filetype = self.groups[0]
                result.qualifiers = re.findall(r'(?:Physical_Colour|Alias|ORIGINAL|UPDATE \d\d\d\d-\d\d)', self.groups[1])
            elif self.try_to_match(
                r'^!LICENSE (.+)$',
                'license'):
                result.license = self.groups[0]
            elif self.try_to_match(
                r'BFC (CERTIFY CW|CERTIFY CCW|NOCERTIFY)',
                'bfc'):
                result.bfc = self.groups[0]
            elif self.try_to_match(
                r'!HISTORY (\d{4}-\d{2}-\d{2}) ([\[{][^\]}]+[\]}]) (.+)$',
                'history'):
                try:
                    time_object = datetime.datetime.strptime(
                        self.groups[0],
                        '%Y-%m-%d',
                    )
                except ValueError:
                    self.parse_error("invalid ISO date in history")
                result.history.append(HistoryEntry(
                    date = time_object.date(),
                    user = self.groups[1],
                    text = self.groups[2],
                ))
            elif self.try_to_match(
                r'!HELP (.+)',
                'help'):
                if result.help:
                    result.help += '\n'
                result.help += self.groups[0]
            elif self.try_to_match(
                r'!CATEGORY (.+)',
                'category'):
                result.category = self.groups[0]
            elif self.try_to_match(
                r'!KEYWORDS (.+)',
                'keywords'):
                if result.keywords:
                    result.keywords += '\n'
                result.keywords += self.groups[0]
            elif self.try_to_match(
                r'!CMDLINE (.+)',
                'cmdline'):
                result.cmdline = self.groups[0]
            else:
                self.parse_error("couldn't understand header syntax: " + repr(header_entry.text))
        if not result.filetype:
            self.parse_error('LDRAW_ORG line is missing')
        return {
            'header': result,
            'end-index': self.cursor + 1,
        }
    def parse_error(self, message):
        raise HeaderError(index = self.cursor, reason = message)
    def get_more_header_stuff(self):
        while True:
            self.cursor += 1
            if self.cursor >= len(self.model_body):
                break
            entry = self.model_body[self.cursor]
            if not is_suitable_header_object(entry):
                break
            if isinstance(entry, linetypes.MetaCommand):
                yield entry
    def skip_to_next(self, *, spaces_expected = 0):
        while True:
            if self.cursor + 1 >= len(self.model_body):
                self.parse_error('file does not have a proper header')
            self.cursor += 1
            entry = self.model_body[self.cursor]
            if not is_suitable_header_object(entry):
                self.parse_error('header is incomplete')
            if isinstance(entry, linetypes.MetaCommand):
                return
    def try_to_match(self, pattern, patterntype):
        try:
            self.groups = self.parse_pattern(pattern, patterntype)
            return True
        except:
            return False
    def current(self):
        entry = self.model_body[self.cursor]
        assert isinstance(entry, linetypes.MetaCommand)
        return entry.text
    def parse_pattern(self, pattern, description):
        match = re.search(pattern, self.current())
        if match:
            self.order.append(description)
            if description not in self.result.first_occurrence:
                self.result.first_occurrence[description] = self.cursor
            return match.groups()
        else:
            self.parse_error(str.format("couldn't parse {}", description))