Mon, 24 Jun 2019 17:34:30 +0300
don't check the category of '~'-files
import re import linetypes import datetime class Header: def __init__(self): self.description = None self.name = None self.author = None self.username = None self.filetype = None self.qualifiers = None self.license = None self.help = '' self.bfc = None self.category = None self.keywords = '' self.cmdline = None self.history = [] self.first_occurrence = dict() @property def valid(self): return True @property def effective_filetype(self): if self.filetype.startswith('Unofficial_'): return self.filetype.rsplit('Unofficial_')[1] else: return self.filetype @property def effective_category(self): if self.category: return self.category else: import string category = self.description.split(' ', 1)[0] while category and category[0] in string.punctuation: category = category[1:] return category class BadHeader: def __init__(self, index, reason): self.index = index self.reason = reason def __repr__(self): return str.format( 'header.BadHeader(index = {index!r}, reason = {reason!r})', index = self.index, reason = self.reason, ) @property def valid(self): return False def is_invertnext(entry): return isinstance(entry, linetypes.MetaCommand) \ and entry.text == "BFC INVERTNEXT" def is_suitable_header_object(entry): if is_invertnext(entry): # BFC INVERTNEXT is not a header command anymore. return False return not any( isinstance(entry, linetype) for linetype in [ linetypes.SubfileReference, linetypes.LineSegment, linetypes.Triangle, linetypes.Quadrilateral, linetypes.ConditionalLine, linetypes.Comment, linetypes.Error, ] ) class HeaderError(Exception): def __init__(self, index, reason): self.index, self.reason = index, reason def __repr__(self): return str.format( 'HeaderError({index!r}, {reason!r})', index = self.index, reason = self.reason, ) def __str__(self): return reason class HistoryEntry: def __init__(self, date, user, text): self.date, self.user, self.text = date, user, text def __repr__(self): return str.format( 'HistoryEntry({date!r}, {user!r}, {text!r})', date = self.date, user = self.user, text = self.text) class HeaderParser: def __init__(self): self.model_body = None self.cursor = 0 self.problems = [] def parse(self, model_body): result = Header() self.result = result self.order = [] self.cursor = -1 self.model_body = model_body self.skip_to_next() result.description = self.current() self.skip_to_next() result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0] self.skip_to_next() result.author, result.username = self.parse_pattern(r'^Author: (?:([^ \[]*[^\[]+) )?(?:\[([^\]]+)\])?', 'author') if not result.author and not result.username: self.parse_error('author line does not contain a name nor username') for header_entry in self.get_more_header_stuff(): if self.try_to_match( r'^!LDRAW_ORG ' \ r'((?:Unofficial_)?(?:' \ r'Part|' \ r'Subpart|' \ r'Primitive|' \ r'8_Primitive|' \ r'48_Primitive|' \ r'Shortcut' \ r'))\s?' \ r'(.*)$', 'part type'): result.filetype = self.groups[0] result.qualifiers = re.findall(r'(?:Physical_Colour|Alias|ORIGINAL|UPDATE \d\d\d\d-\d\d)', self.groups[1]) elif self.try_to_match( r'^!LICENSE (.+)$', 'license'): result.license = self.groups[0] elif self.try_to_match( r'BFC (CERTIFY CW|CERTIFY CCW|NOCERTIFY)', 'bfc'): result.bfc = self.groups[0] elif self.try_to_match( r'!HISTORY (\d{4}-\d{2}-\d{2}) ([\[{][^\]}]+[\]}]) (.+)$', 'history'): try: time_object = datetime.datetime.strptime( self.groups[0], '%Y-%m-%d', ) except ValueError: self.parse_error("invalid ISO date in history") result.history.append(HistoryEntry( date = time_object.date(), user = self.groups[1], text = self.groups[2], )) elif self.try_to_match( r'!HELP (.+)', 'help'): if result.help: result.help += '\n' result.help += self.groups[0] elif self.try_to_match( r'!CATEGORY (.+)', 'category'): result.category = self.groups[0] elif self.try_to_match( r'!KEYWORDS (.+)', 'keywords'): if result.keywords: result.keywords += '\n' result.keywords += self.groups[0] elif self.try_to_match( r'!CMDLINE (.+)', 'cmdline'): result.cmdline = self.groups[0] else: self.cursor -= 1 break if not result.filetype: self.parse_error('LDRAW_ORG line is missing') return { 'header': result, 'end-index': self.cursor + 1, } def parse_error(self, message): raise HeaderError(index = self.cursor, reason = message) def get_more_header_stuff(self): self.cursor += 1 new_cursor = self.cursor while new_cursor < len(self.model_body): entry = self.model_body[new_cursor] if not is_suitable_header_object(entry): break if isinstance(entry, linetypes.MetaCommand): self.cursor = new_cursor yield entry new_cursor += 1 def skip_to_next(self, *, spaces_expected = 0): while True: if self.cursor + 1 >= len(self.model_body): self.parse_error('file does not have a proper header') self.cursor += 1 entry = self.model_body[self.cursor] if not is_suitable_header_object(entry): self.parse_error('header is incomplete') if isinstance(entry, linetypes.MetaCommand): return def try_to_match(self, pattern, patterntype): try: self.groups = self.parse_pattern(pattern, patterntype) return True except: return False def current(self): entry = self.model_body[self.cursor] assert isinstance(entry, linetypes.MetaCommand) return entry.text def parse_pattern(self, pattern, description): match = re.search(pattern, self.current()) if match: self.order.append(description) if description not in self.result.first_occurrence: self.result.first_occurrence[description] = self.cursor return match.groups() else: self.parse_error(str.format("couldn't parse {}", description))