diff -r 3555679d276b -r bec55b021ae7 header.py --- a/header.py Thu Aug 26 19:16:25 2021 +0300 +++ b/header.py Thu Aug 26 19:36:44 2021 +0300 @@ -3,6 +3,10 @@ import datetime class Header: + ''' + Result type of header processing, this contains all the header + information. + ''' def __init__(self): self.description = None self.name = None @@ -17,18 +21,27 @@ self.keywords = '' self.cmdline = None self.history = [] - self.first_occurrence = dict() + from collections import defaultdict + self.occurrences = defaultdict(list) @property def valid(self): return True @property def effective_filetype(self): + ''' + What's the effective file type? The "Unofficial_" prefix is + left out. + ''' if self.filetype.startswith('Unofficial_'): return self.filetype.rsplit('Unofficial_')[1] else: return self.filetype @property def effective_category(self): + ''' + Returns the category of the part. Leading punctuation marks + are ignored. + ''' if self.category: return self.category else: @@ -39,6 +52,11 @@ return category class BadHeader: + ''' + If header processing fails this object is returned as the resulting + header instead. It contains the details of where the header could not + be understood and why. + ''' def __init__(self, index, reason): self.index = index self.reason = reason @@ -57,9 +75,14 @@ and entry.text == "BFC INVERTNEXT" def is_suitable_header_object(entry): + ''' + Is the given object something that we can consider to be + part of the header? + ''' if is_invertnext(entry): - # BFC INVERTNEXT is not a header command anymore. + # It's BFC INVERTNEXT, that's not a header command. return False + # Check if it's one of the functional linetypes return not any( isinstance(entry, linetype) for linetype in [ @@ -74,6 +97,9 @@ ) class HeaderError(Exception): + ''' + An error raised during header parsing + ''' def __init__(self, index, reason): self.index, self.reason = index, reason def __repr__(self): @@ -86,6 +112,9 @@ return reason class HistoryEntry: + ''' + Represents a single !HISTORY entry + ''' def __init__(self, date, user, text): self.date, self.user, self.text = date, user, text def __repr__(self): @@ -111,9 +140,14 @@ self.skip_to_next() result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0] self.skip_to_next() - result.author, result.username = self.parse_pattern(r'^Author: (?:([^ \[]*[^\[]+) )?(?:\[([^\]]+)\])?', 'author') + # Parse author line + result.author, result.username = self.parse_pattern(r'^Author: (?:([^\[]+))?(?:\[([^\]]+)\])?', 'author') + if isinstance(result.author, str): + # clean leading spaces + result.author = str.strip(result.author) if not result.author and not result.username: self.parse_error('author line does not contain a name nor username') + # use more patterns to parse the rest of the header for header_entry in self.get_more_header_stuff(): if self.try_to_match( r'^!LDRAW_ORG ' \ @@ -179,24 +213,33 @@ self.parse_error('LDRAW_ORG line is missing') return { 'header': result, - 'end-index': self.cursor + 1, + 'end-index': self.cursor + 1, # record where the header ended } def parse_error(self, message): raise HeaderError(index = self.cursor, reason = message) def get_more_header_stuff(self): + ''' + Iterates through the header and yields metacommand entries + one after the other. + ''' self.cursor += 1 new_cursor = self.cursor while new_cursor < len(self.model_body): entry = self.model_body[new_cursor] if not is_suitable_header_object(entry): + # looks like the header ended break if isinstance(entry, linetypes.MetaCommand): self.cursor = new_cursor yield entry new_cursor += 1 def skip_to_next(self, *, spaces_expected = 0): + ''' + Skip to the next header line. + ''' while True: if self.cursor + 1 >= len(self.model_body): + # wound up past the end of model self.parse_error('file does not have a proper header') self.cursor += 1 entry = self.model_body[self.cursor] @@ -205,21 +248,32 @@ if isinstance(entry, linetypes.MetaCommand): return def try_to_match(self, pattern, patterntype): + ''' + Tries to parse the specified pattern and to store the groups in + self.groups. Returns whether or not this succeeded. + ''' try: self.groups = self.parse_pattern(pattern, patterntype) return True except: return False def current(self): + ''' + Returns the text of the header line we're currently processing. + ''' entry = self.model_body[self.cursor] assert isinstance(entry, linetypes.MetaCommand) return entry.text def parse_pattern(self, pattern, description): + ''' + Matches the current header line against the specified pattern. + If not, raises an exception. See try_to_match for a softer wrapper + that does not raise exceptions. + ''' match = re.search(pattern, self.current()) if match: self.order.append(description) - if description not in self.result.first_occurrence: - self.result.first_occurrence[description] = self.cursor + list.append(self.result.occurrences[description], self.cursor) return match.groups() else: self.parse_error(str.format("couldn't parse {}", description))