ldcheck: comparison header.py

-:3555679d276b
+:bec55b021ae7
 import re
 import linetypes
 import datetime
 class Header:
+'''
+Result type of header processing, this contains all the header
+information.
+'''
 def __init__(self):
 self.description = None
 self.name = None
 self.author = None
 self.username = None
 self.bfc = None
 self.category = None
 self.keywords = ''
 self.cmdline = None
 self.history = []
-self.first_occurrence = dict()
+from collections import defaultdict
+self.occurrences = defaultdict(list)
 @property
 def valid(self):
 return True
 @property
 def effective_filetype(self):
+'''
+What's the effective file type? The "Unofficial_" prefix is
+left out.
+'''
 if self.filetype.startswith('Unofficial_'):
 return self.filetype.rsplit('Unofficial_')[1]
 else:
 return self.filetype
 @property
 def effective_category(self):
+'''
+Returns the category of the part. Leading punctuation marks
+are ignored.
+'''
 if self.category:
 return self.category
 else:
 import string
 category = self.description.split(' ', 1)[0]
 while category and category[0] in string.punctuation:
 category = category[1:]
 return category
 class BadHeader:
+'''
+If header processing fails this object is returned as the resulting
+header instead. It contains the details of where the header could not
+be understood and why.
+'''
 def __init__(self, index, reason):
 self.index = index
 self.reason = reason
 def __repr__(self):
 return str.format(
 def is_invertnext(entry):
 return isinstance(entry, linetypes.MetaCommand) \
 and entry.text == "BFC INVERTNEXT"
 def is_suitable_header_object(entry):
+'''
+Is the given object something that we can consider to be
+part of the header?
+'''
 if is_invertnext(entry):
-# BFC INVERTNEXT is not a header command anymore.
+# It's BFC INVERTNEXT, that's not a header command.
 return False
+# Check if it's one of the functional linetypes
 return not any(
 isinstance(entry, linetype)
 for linetype in [
 linetypes.SubfileReference,
 linetypes.LineSegment,
 linetypes.Error,
 ]
 )
 class HeaderError(Exception):
+'''
+An error raised during header parsing
+'''
 def __init__(self, index, reason):
 self.index, self.reason = index, reason
 def __repr__(self):
 return str.format(
 'HeaderError({index!r}, {reason!r})',
 )
 def __str__(self):
 return reason
 class HistoryEntry:
+'''
+Represents a single !HISTORY entry
+'''
 def __init__(self, date, user, text):
 self.date, self.user, self.text = date, user, text
 def __repr__(self):
 return str.format(
 'HistoryEntry({date!r}, {user!r}, {text!r})',
 self.skip_to_next()
 result.description = self.current()
 self.skip_to_next()
 result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0]
 self.skip_to_next()
-result.author, result.username = self.parse_pattern(r'^Author: (?:([^ \[]*[^\[]+) )?(?:\[([^\]]+)\])?', 'author')
+# Parse author line
+result.author, result.username = self.parse_pattern(r'^Author: (?:([^\[]+))?(?:\[([^\]]+)\])?', 'author')
+if isinstance(result.author, str):
+# clean leading spaces
+result.author = str.strip(result.author)
 if not result.author and not result.username:
 self.parse_error('author line does not contain a name nor username')
+# use more patterns to parse the rest of the header
 for header_entry in self.get_more_header_stuff():
 if self.try_to_match(
 r'^!LDRAW_ORG ' \
 r'((?:Unofficial_)?(?:' \
 r'Part|' \
 break
 if not result.filetype:
 self.parse_error('LDRAW_ORG line is missing')
 return {
 'header': result,
-'end-index': self.cursor + 1,
+'end-index': self.cursor + 1, # record where the header ended
 }
 def parse_error(self, message):
 raise HeaderError(index = self.cursor, reason = message)
 def get_more_header_stuff(self):
+'''
+Iterates through the header and yields metacommand entries
+one after the other.
+'''
 self.cursor += 1
 new_cursor = self.cursor
 while new_cursor < len(self.model_body):
 entry = self.model_body[new_cursor]
 if not is_suitable_header_object(entry):
+# looks like the header ended
 break
 if isinstance(entry, linetypes.MetaCommand):
 self.cursor = new_cursor
 yield entry
 new_cursor += 1
 def skip_to_next(self, *, spaces_expected = 0):
+'''
+Skip to the next header line.
+'''
 while True:
 if self.cursor + 1 >= len(self.model_body):
+# wound up past the end of model
 self.parse_error('file does not have a proper header')
 self.cursor += 1
 entry = self.model_body[self.cursor]
 if not is_suitable_header_object(entry):
 self.parse_error('header is incomplete')
 if isinstance(entry, linetypes.MetaCommand):
 return
 def try_to_match(self, pattern, patterntype):
+'''
+Tries to parse the specified pattern and to store the groups in
+self.groups. Returns whether or not this succeeded.
+'''
 try:
 self.groups = self.parse_pattern(pattern, patterntype)
 return True
 except:
 return False
 def current(self):
+'''
+Returns the text of the header line we're currently processing.
+'''
 entry = self.model_body[self.cursor]
 assert isinstance(entry, linetypes.MetaCommand)
 return entry.text
 def parse_pattern(self, pattern, description):
+'''
+Matches the current header line against the specified pattern.
+If not, raises an exception. See try_to_match for a softer wrapper
+that does not raise exceptions.
+'''
 match = re.search(pattern, self.current())
 if match:
 self.order.append(description)
-if description not in self.result.first_occurrence:
+list.append(self.result.occurrences[description], self.cursor)
-self.result.first_occurrence[description] = self.cursor
 return match.groups()
 else:
 self.parse_error(str.format("couldn't parse {}", description))

Mercurial > ldcheck / file comparison

comparison: header.py

header.py