diff -r 4da025d0b283 -r 38b0919c1934 header.py --- a/header.py Sat May 25 09:41:33 2019 +0200 +++ b/header.py Sat May 25 18:37:55 2019 +0200 @@ -1,5 +1,6 @@ import re import linetypes +import datetime class Header: def __init__(self): @@ -10,12 +11,16 @@ self.filetype = None self.qualifiers = None self.license = None - self.help = [] + self.help = '' self.bfc = None self.category = None - self.keywords = [] + self.keywords = '' self.cmdline = None self.history = [] + self.first_occurrence = dict() + @property + def valid(self): + return True class BadHeader: def __init__(self, index, reason): @@ -27,6 +32,9 @@ index = self.index, reason = self.reason, ) + @property + def valid(self): + return False geometrical_types = [ linetypes.LineSegment, @@ -57,6 +65,16 @@ def __str__(self): return reason +class HistoryEntry: + def __init__(self, date, user, text): + self.date, self.user, self.text = date, user, text + def __repr__(self): + return str.format( + 'HistoryEntry({date!r}, {user!r}, {text!r})', + date = self.date, + user = self.user, + text = self.text) + class HeaderParser: def __init__(self): self.model_body = None @@ -64,6 +82,7 @@ self.problems = [] def parse(self, model_body): result = Header() + self.result = result self.order = [] self.cursor = -1 self.model_body = model_body @@ -72,11 +91,11 @@ self.skip_to_next() result.name = self.parse_pattern('^Name: (.+)$', 'name')[0] self.skip_to_next() - result.author = self.parse_pattern('^Author: (.+)$', 'author')[0] + result.author, result.username = self.parse_pattern(r'^Author: ([^\[]+)\s*(?:\[(.+)\])?$', 'author') for header_entry in self.get_more_header_stuff(): if self.try_to_match( '^!LDRAW_ORG ' + - '(' \ + r'(' \ '(?:Unofficial_)?' \ 'Part|' \ 'Subpart|' \ @@ -85,15 +104,48 @@ '48_Primitive|' \ 'Shortcut' \ ')\s?' \ - '(ORIGINAL|UPDATE \d\d\d\d-\d\d)?$', + '(.*)$', 'part type'): - result.filetype, result.qualifiers = self.groups + result.filetype = self.groups[0] + result.qualifiers = re.findall(r'(?:Physical_Colour|Alias|ORIGINAL|UPDATE \d\d\d\d-\d\d)', self.groups[1]) elif self.try_to_match( '^!LICENSE (.+)$', 'license'): - result.license = self.groups() + result.license = self.groups[0] + elif self.try_to_match( + 'BFC (CERTIFY CW|CERTIFY CCW|NOCERTIFY)', + 'bfc'): + result.bfc = self.groups[0] + elif self.try_to_match( + r'!HISTORY (\d{4}-\d{2}-\d{2}) ([\[{][^\]}]+[\]}]) (.+)$', + 'history'): + result.history.append(HistoryEntry( + date = datetime.datetime.strptime(self.groups[0], '%Y-%m-%d').date(), + user = self.groups[1], + text = self.groups[2], + )) + elif self.try_to_match( + r'!HELP (.+)', + 'help'): + if result.help: + result.help += '\n' + result.help += self.groups[0] + elif self.try_to_match( + r'!CATEGORY (.+)', + 'category'): + result.category = self.groups[0] + elif self.try_to_match( + r'!KEYWORDS (.+)', + 'keywords'): + if result.keywords: + result.keywords += '\n' + result.keywords += self.groups[0] + elif self.try_to_match( + r'!CMDLINE (.+)', + 'cmdline'): + result.cmdline = self.groups[0] else: - self.parse_error("couldn't handle header metacommand: " + repr(header_entry.text)) + self.parse_error("couldn't understand header syntax: " + repr(header_entry.text)) return { 'header': result, 'end-index': self.cursor + 1, @@ -123,6 +175,7 @@ def try_to_match(self, pattern, patterntype): try: self.groups = self.parse_pattern(pattern, patterntype) + return True except: return False def current(self): @@ -133,6 +186,8 @@ match = re.search(pattern, self.current()) if match: self.order.append(description) + if description not in self.result.first_occurrence: + self.result.first_occurrence[description] = self.cursor return match.groups() else: self.parse_error(str.format("couldn't parse {}", description))