header.py

changeset 147
bec55b021ae7
parent 101
745f2c3aec0a
--- a/header.py	Thu Aug 26 19:16:25 2021 +0300
+++ b/header.py	Thu Aug 26 19:36:44 2021 +0300
@@ -3,6 +3,10 @@
 import datetime
 
 class Header:
+    '''
+        Result type of header processing, this contains all the header
+        information.
+    '''
     def __init__(self):
         self.description = None
         self.name = None
@@ -17,18 +21,27 @@
         self.keywords = ''
         self.cmdline = None
         self.history = []
-        self.first_occurrence = dict()
+        from collections import defaultdict
+        self.occurrences = defaultdict(list)
     @property
     def valid(self):
         return True
     @property
     def effective_filetype(self):
+        '''
+            What's the effective file type? The "Unofficial_" prefix is
+            left out.
+        '''
         if self.filetype.startswith('Unofficial_'):
             return self.filetype.rsplit('Unofficial_')[1]
         else:
             return self.filetype
     @property
     def effective_category(self):
+        '''
+            Returns the category of the part. Leading punctuation marks
+            are ignored.
+        '''
         if self.category:
             return self.category
         else:
@@ -39,6 +52,11 @@
             return category
 
 class BadHeader:
+    '''
+        If header processing fails this object is returned as the resulting
+        header instead. It contains the details of where the header could not
+        be understood and why.
+    '''
     def __init__(self, index, reason):
         self.index = index
         self.reason = reason
@@ -57,9 +75,14 @@
         and entry.text == "BFC INVERTNEXT"
 
 def is_suitable_header_object(entry):
+    '''
+        Is the given object something that we can consider to be
+        part of the header?
+    '''
     if is_invertnext(entry):
-        # BFC INVERTNEXT is not a header command anymore.
+        # It's BFC INVERTNEXT, that's not a header command.
         return False
+    # Check if it's one of the functional linetypes
     return not any(
         isinstance(entry, linetype)
         for linetype in [
@@ -74,6 +97,9 @@
     )
 
 class HeaderError(Exception):
+    '''
+        An error raised during header parsing
+    '''
     def __init__(self, index, reason):
         self.index, self.reason = index, reason
     def __repr__(self):
@@ -86,6 +112,9 @@
         return reason
 
 class HistoryEntry:
+    '''
+        Represents a single !HISTORY entry
+    '''
     def __init__(self, date, user, text):
         self.date, self.user, self.text = date, user, text
     def __repr__(self):
@@ -111,9 +140,14 @@
         self.skip_to_next()
         result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0]
         self.skip_to_next()
-        result.author, result.username = self.parse_pattern(r'^Author: (?:([^ \[]*[^\[]+) )?(?:\[([^\]]+)\])?', 'author')
+        # Parse author line
+        result.author, result.username = self.parse_pattern(r'^Author: (?:([^\[]+))?(?:\[([^\]]+)\])?', 'author')
+        if isinstance(result.author, str):
+            # clean leading spaces
+            result.author = str.strip(result.author)
         if not result.author and not result.username:
             self.parse_error('author line does not contain a name nor username')
+        # use more patterns to parse the rest of the header
         for header_entry in self.get_more_header_stuff():
             if self.try_to_match(
                 r'^!LDRAW_ORG ' \
@@ -179,24 +213,33 @@
             self.parse_error('LDRAW_ORG line is missing')
         return {
             'header': result,
-            'end-index': self.cursor + 1,
+            'end-index': self.cursor + 1, # record where the header ended
         }
     def parse_error(self, message):
         raise HeaderError(index = self.cursor, reason = message)
     def get_more_header_stuff(self):
+        '''
+            Iterates through the header and yields metacommand entries
+            one after the other.
+        '''
         self.cursor += 1
         new_cursor = self.cursor
         while new_cursor < len(self.model_body):
             entry = self.model_body[new_cursor]
             if not is_suitable_header_object(entry):
+                # looks like the header ended
                 break
             if isinstance(entry, linetypes.MetaCommand):
                 self.cursor = new_cursor
                 yield entry
             new_cursor += 1
     def skip_to_next(self, *, spaces_expected = 0):
+        '''
+            Skip to the next header line.
+        '''
         while True:
             if self.cursor + 1 >= len(self.model_body):
+                # wound up past the end of model
                 self.parse_error('file does not have a proper header')
             self.cursor += 1
             entry = self.model_body[self.cursor]
@@ -205,21 +248,32 @@
             if isinstance(entry, linetypes.MetaCommand):
                 return
     def try_to_match(self, pattern, patterntype):
+        '''
+            Tries to parse the specified pattern and to store the groups in
+            self.groups. Returns whether or not this succeeded.
+        '''
         try:
             self.groups = self.parse_pattern(pattern, patterntype)
             return True
         except:
             return False
     def current(self):
+        '''
+            Returns the text of the header line we're currently processing.
+        '''
         entry = self.model_body[self.cursor]
         assert isinstance(entry, linetypes.MetaCommand)
         return entry.text
     def parse_pattern(self, pattern, description):
+        '''
+            Matches the current header line against the specified pattern.
+            If not, raises an exception. See try_to_match for a softer wrapper
+            that does not raise exceptions.
+        '''
         match = re.search(pattern, self.current())
         if match:
             self.order.append(description)
-            if description not in self.result.first_occurrence:
-                self.result.first_occurrence[description] = self.cursor
+            list.append(self.result.occurrences[description], self.cursor)
             return match.groups()
         else:
             self.parse_error(str.format("couldn't parse {}", description))

mercurial