header.py

changeset 97
7b24ff111cb6
parent 84
55d52e25267f
child 101
745f2c3aec0a
equal deleted inserted replaced
96:70f8049623ec 97:7b24ff111cb6
1 import re 1 import re
2 import linetypes 2 import linetypes
3 import datetime 3 import datetime
4 4
5 class Header: 5 class Header:
6 '''
7 Result type of header processing, this contains all the header
8 information.
9 '''
6 def __init__(self): 10 def __init__(self):
7 self.description = None 11 self.description = None
8 self.name = None 12 self.name = None
9 self.author = None 13 self.author = None
10 self.username = None 14 self.username = None
21 @property 25 @property
22 def valid(self): 26 def valid(self):
23 return True 27 return True
24 @property 28 @property
25 def effective_filetype(self): 29 def effective_filetype(self):
30 '''
31 What's the effective file type? The "Unofficial_" prefix is
32 left out.
33 '''
26 if self.filetype.startswith('Unofficial_'): 34 if self.filetype.startswith('Unofficial_'):
27 return self.filetype.rsplit('Unofficial_')[1] 35 return self.filetype.rsplit('Unofficial_')[1]
28 else: 36 else:
29 return self.filetype 37 return self.filetype
30 @property 38 @property
31 def effective_category(self): 39 def effective_category(self):
40 '''
41 Returns the category of the part. Leading punctuation marks
42 are ignored.
43 '''
32 if self.category: 44 if self.category:
33 return self.category 45 return self.category
34 else: 46 else:
35 import string 47 import string
36 category = self.description.split(' ', 1)[0] 48 category = self.description.split(' ', 1)[0]
37 while category and category[0] in string.punctuation: 49 while category and category[0] in string.punctuation:
38 category = category[1:] 50 category = category[1:]
39 return category 51 return category
40 52
41 class BadHeader: 53 class BadHeader:
54 '''
55 If header processing fails this object is returned as the resulting
56 header instead. It contains the details of where the header could not
57 be understood and why.
58 '''
42 def __init__(self, index, reason): 59 def __init__(self, index, reason):
43 self.index = index 60 self.index = index
44 self.reason = reason 61 self.reason = reason
45 def __repr__(self): 62 def __repr__(self):
46 return str.format( 63 return str.format(
55 def is_invertnext(entry): 72 def is_invertnext(entry):
56 return isinstance(entry, linetypes.MetaCommand) \ 73 return isinstance(entry, linetypes.MetaCommand) \
57 and entry.text == "BFC INVERTNEXT" 74 and entry.text == "BFC INVERTNEXT"
58 75
59 def is_suitable_header_object(entry): 76 def is_suitable_header_object(entry):
77 '''
78 Is the given object something that we can consider to be
79 part of the header?
80 '''
60 if is_invertnext(entry): 81 if is_invertnext(entry):
61 # BFC INVERTNEXT is not a header command anymore. 82 # It's BFC INVERTNEXT, that's not a header command.
62 return False 83 return False
84 # Check if it's one of the functional linetypes
63 return not any( 85 return not any(
64 isinstance(entry, linetype) 86 isinstance(entry, linetype)
65 for linetype in [ 87 for linetype in [
66 linetypes.SubfileReference, 88 linetypes.SubfileReference,
67 linetypes.LineSegment, 89 linetypes.LineSegment,
72 linetypes.Error, 94 linetypes.Error,
73 ] 95 ]
74 ) 96 )
75 97
76 class HeaderError(Exception): 98 class HeaderError(Exception):
99 '''
100 An error raised during header parsing
101 '''
77 def __init__(self, index, reason): 102 def __init__(self, index, reason):
78 self.index, self.reason = index, reason 103 self.index, self.reason = index, reason
79 def __repr__(self): 104 def __repr__(self):
80 return str.format( 105 return str.format(
81 'HeaderError({index!r}, {reason!r})', 106 'HeaderError({index!r}, {reason!r})',
84 ) 109 )
85 def __str__(self): 110 def __str__(self):
86 return reason 111 return reason
87 112
88 class HistoryEntry: 113 class HistoryEntry:
114 '''
115 Represents a single !HISTORY entry
116 '''
89 def __init__(self, date, user, text): 117 def __init__(self, date, user, text):
90 self.date, self.user, self.text = date, user, text 118 self.date, self.user, self.text = date, user, text
91 def __repr__(self): 119 def __repr__(self):
92 return str.format( 120 return str.format(
93 'HistoryEntry({date!r}, {user!r}, {text!r})', 121 'HistoryEntry({date!r}, {user!r}, {text!r})',
109 self.skip_to_next() 137 self.skip_to_next()
110 result.description = self.current() 138 result.description = self.current()
111 self.skip_to_next() 139 self.skip_to_next()
112 result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0] 140 result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0]
113 self.skip_to_next() 141 self.skip_to_next()
114 result.author, result.username = self.parse_pattern(r'^Author: (?:([^ \[]*[^\[]+) )?(?:\[([^\]]+)\])?', 'author') 142 # Parse author line
143 result.author, result.username = self.parse_pattern(r'^Author: (?:([^\[]+))?(?:\[([^\]]+)\])?', 'author')
144 if isinstance(result.author, str):
145 # clean leading spaces
146 result.author = str.strip(result.author)
115 if not result.author and not result.username: 147 if not result.author and not result.username:
116 self.parse_error('author line does not contain a name nor username') 148 self.parse_error('author line does not contain a name nor username')
149 # use more patterns to parse the rest of the header
117 for header_entry in self.get_more_header_stuff(): 150 for header_entry in self.get_more_header_stuff():
118 if self.try_to_match( 151 if self.try_to_match(
119 r'^!LDRAW_ORG ' \ 152 r'^!LDRAW_ORG ' \
120 r'((?:Unofficial_)?(?:' \ 153 r'((?:Unofficial_)?(?:' \
121 r'Part|' \ 154 r'Part|' \
177 break 210 break
178 if not result.filetype: 211 if not result.filetype:
179 self.parse_error('LDRAW_ORG line is missing') 212 self.parse_error('LDRAW_ORG line is missing')
180 return { 213 return {
181 'header': result, 214 'header': result,
182 'end-index': self.cursor + 1, 215 'end-index': self.cursor + 1, # record where the header ended
183 } 216 }
184 def parse_error(self, message): 217 def parse_error(self, message):
185 raise HeaderError(index = self.cursor, reason = message) 218 raise HeaderError(index = self.cursor, reason = message)
186 def get_more_header_stuff(self): 219 def get_more_header_stuff(self):
220 '''
221 Iterates through the header and yields metacommand entries
222 one after the other.
223 '''
187 self.cursor += 1 224 self.cursor += 1
188 new_cursor = self.cursor 225 new_cursor = self.cursor
189 while new_cursor < len(self.model_body): 226 while new_cursor < len(self.model_body):
190 entry = self.model_body[new_cursor] 227 entry = self.model_body[new_cursor]
191 if not is_suitable_header_object(entry): 228 if not is_suitable_header_object(entry):
229 # looks like the header ended
192 break 230 break
193 if isinstance(entry, linetypes.MetaCommand): 231 if isinstance(entry, linetypes.MetaCommand):
194 self.cursor = new_cursor 232 self.cursor = new_cursor
195 yield entry 233 yield entry
196 new_cursor += 1 234 new_cursor += 1
197 def skip_to_next(self, *, spaces_expected = 0): 235 def skip_to_next(self, *, spaces_expected = 0):
236 '''
237 Skip to the next header line.
238 '''
198 while True: 239 while True:
199 if self.cursor + 1 >= len(self.model_body): 240 if self.cursor + 1 >= len(self.model_body):
241 # wound up past the end of model
200 self.parse_error('file does not have a proper header') 242 self.parse_error('file does not have a proper header')
201 self.cursor += 1 243 self.cursor += 1
202 entry = self.model_body[self.cursor] 244 entry = self.model_body[self.cursor]
203 if not is_suitable_header_object(entry): 245 if not is_suitable_header_object(entry):
204 self.parse_error('header is incomplete') 246 self.parse_error('header is incomplete')
205 if isinstance(entry, linetypes.MetaCommand): 247 if isinstance(entry, linetypes.MetaCommand):
206 return 248 return
207 def try_to_match(self, pattern, patterntype): 249 def try_to_match(self, pattern, patterntype):
250 '''
251 Tries to parse the specified pattern and to store the groups in
252 self.groups. Returns whether or not this succeeded.
253 '''
208 try: 254 try:
209 self.groups = self.parse_pattern(pattern, patterntype) 255 self.groups = self.parse_pattern(pattern, patterntype)
210 return True 256 return True
211 except: 257 except:
212 return False 258 return False
213 def current(self): 259 def current(self):
260 '''
261 Returns the text of the header line we're currently processing.
262 '''
214 entry = self.model_body[self.cursor] 263 entry = self.model_body[self.cursor]
215 assert isinstance(entry, linetypes.MetaCommand) 264 assert isinstance(entry, linetypes.MetaCommand)
216 return entry.text 265 return entry.text
217 def parse_pattern(self, pattern, description): 266 def parse_pattern(self, pattern, description):
267 '''
268 Matches the current header line against the specified pattern.
269 If not, raises an exception. See try_to_match for a softer wrapper
270 that does not raise exceptions.
271 '''
218 match = re.search(pattern, self.current()) 272 match = re.search(pattern, self.current())
219 if match: 273 if match:
220 self.order.append(description) 274 self.order.append(description)
221 if description not in self.result.first_occurrence: 275 if description not in self.result.first_occurrence:
222 self.result.first_occurrence[description] = self.cursor 276 self.result.first_occurrence[description] = self.cursor

mercurial