header.py

changeset 147
bec55b021ae7
parent 101
745f2c3aec0a
equal deleted inserted replaced
146:3555679d276b 147:bec55b021ae7
1 import re 1 import re
2 import linetypes 2 import linetypes
3 import datetime 3 import datetime
4 4
5 class Header: 5 class Header:
6 '''
7 Result type of header processing, this contains all the header
8 information.
9 '''
6 def __init__(self): 10 def __init__(self):
7 self.description = None 11 self.description = None
8 self.name = None 12 self.name = None
9 self.author = None 13 self.author = None
10 self.username = None 14 self.username = None
15 self.bfc = None 19 self.bfc = None
16 self.category = None 20 self.category = None
17 self.keywords = '' 21 self.keywords = ''
18 self.cmdline = None 22 self.cmdline = None
19 self.history = [] 23 self.history = []
20 self.first_occurrence = dict() 24 from collections import defaultdict
25 self.occurrences = defaultdict(list)
21 @property 26 @property
22 def valid(self): 27 def valid(self):
23 return True 28 return True
24 @property 29 @property
25 def effective_filetype(self): 30 def effective_filetype(self):
31 '''
32 What's the effective file type? The "Unofficial_" prefix is
33 left out.
34 '''
26 if self.filetype.startswith('Unofficial_'): 35 if self.filetype.startswith('Unofficial_'):
27 return self.filetype.rsplit('Unofficial_')[1] 36 return self.filetype.rsplit('Unofficial_')[1]
28 else: 37 else:
29 return self.filetype 38 return self.filetype
30 @property 39 @property
31 def effective_category(self): 40 def effective_category(self):
41 '''
42 Returns the category of the part. Leading punctuation marks
43 are ignored.
44 '''
32 if self.category: 45 if self.category:
33 return self.category 46 return self.category
34 else: 47 else:
35 import string 48 import string
36 category = self.description.split(' ', 1)[0] 49 category = self.description.split(' ', 1)[0]
37 while category and category[0] in string.punctuation: 50 while category and category[0] in string.punctuation:
38 category = category[1:] 51 category = category[1:]
39 return category 52 return category
40 53
41 class BadHeader: 54 class BadHeader:
55 '''
56 If header processing fails this object is returned as the resulting
57 header instead. It contains the details of where the header could not
58 be understood and why.
59 '''
42 def __init__(self, index, reason): 60 def __init__(self, index, reason):
43 self.index = index 61 self.index = index
44 self.reason = reason 62 self.reason = reason
45 def __repr__(self): 63 def __repr__(self):
46 return str.format( 64 return str.format(
55 def is_invertnext(entry): 73 def is_invertnext(entry):
56 return isinstance(entry, linetypes.MetaCommand) \ 74 return isinstance(entry, linetypes.MetaCommand) \
57 and entry.text == "BFC INVERTNEXT" 75 and entry.text == "BFC INVERTNEXT"
58 76
59 def is_suitable_header_object(entry): 77 def is_suitable_header_object(entry):
78 '''
79 Is the given object something that we can consider to be
80 part of the header?
81 '''
60 if is_invertnext(entry): 82 if is_invertnext(entry):
61 # BFC INVERTNEXT is not a header command anymore. 83 # It's BFC INVERTNEXT, that's not a header command.
62 return False 84 return False
85 # Check if it's one of the functional linetypes
63 return not any( 86 return not any(
64 isinstance(entry, linetype) 87 isinstance(entry, linetype)
65 for linetype in [ 88 for linetype in [
66 linetypes.SubfileReference, 89 linetypes.SubfileReference,
67 linetypes.LineSegment, 90 linetypes.LineSegment,
72 linetypes.Error, 95 linetypes.Error,
73 ] 96 ]
74 ) 97 )
75 98
76 class HeaderError(Exception): 99 class HeaderError(Exception):
100 '''
101 An error raised during header parsing
102 '''
77 def __init__(self, index, reason): 103 def __init__(self, index, reason):
78 self.index, self.reason = index, reason 104 self.index, self.reason = index, reason
79 def __repr__(self): 105 def __repr__(self):
80 return str.format( 106 return str.format(
81 'HeaderError({index!r}, {reason!r})', 107 'HeaderError({index!r}, {reason!r})',
84 ) 110 )
85 def __str__(self): 111 def __str__(self):
86 return reason 112 return reason
87 113
88 class HistoryEntry: 114 class HistoryEntry:
115 '''
116 Represents a single !HISTORY entry
117 '''
89 def __init__(self, date, user, text): 118 def __init__(self, date, user, text):
90 self.date, self.user, self.text = date, user, text 119 self.date, self.user, self.text = date, user, text
91 def __repr__(self): 120 def __repr__(self):
92 return str.format( 121 return str.format(
93 'HistoryEntry({date!r}, {user!r}, {text!r})', 122 'HistoryEntry({date!r}, {user!r}, {text!r})',
109 self.skip_to_next() 138 self.skip_to_next()
110 result.description = self.current() 139 result.description = self.current()
111 self.skip_to_next() 140 self.skip_to_next()
112 result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0] 141 result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0]
113 self.skip_to_next() 142 self.skip_to_next()
114 result.author, result.username = self.parse_pattern(r'^Author: (?:([^ \[]*[^\[]+) )?(?:\[([^\]]+)\])?', 'author') 143 # Parse author line
144 result.author, result.username = self.parse_pattern(r'^Author: (?:([^\[]+))?(?:\[([^\]]+)\])?', 'author')
145 if isinstance(result.author, str):
146 # clean leading spaces
147 result.author = str.strip(result.author)
115 if not result.author and not result.username: 148 if not result.author and not result.username:
116 self.parse_error('author line does not contain a name nor username') 149 self.parse_error('author line does not contain a name nor username')
150 # use more patterns to parse the rest of the header
117 for header_entry in self.get_more_header_stuff(): 151 for header_entry in self.get_more_header_stuff():
118 if self.try_to_match( 152 if self.try_to_match(
119 r'^!LDRAW_ORG ' \ 153 r'^!LDRAW_ORG ' \
120 r'((?:Unofficial_)?(?:' \ 154 r'((?:Unofficial_)?(?:' \
121 r'Part|' \ 155 r'Part|' \
177 break 211 break
178 if not result.filetype: 212 if not result.filetype:
179 self.parse_error('LDRAW_ORG line is missing') 213 self.parse_error('LDRAW_ORG line is missing')
180 return { 214 return {
181 'header': result, 215 'header': result,
182 'end-index': self.cursor + 1, 216 'end-index': self.cursor + 1, # record where the header ended
183 } 217 }
184 def parse_error(self, message): 218 def parse_error(self, message):
185 raise HeaderError(index = self.cursor, reason = message) 219 raise HeaderError(index = self.cursor, reason = message)
186 def get_more_header_stuff(self): 220 def get_more_header_stuff(self):
221 '''
222 Iterates through the header and yields metacommand entries
223 one after the other.
224 '''
187 self.cursor += 1 225 self.cursor += 1
188 new_cursor = self.cursor 226 new_cursor = self.cursor
189 while new_cursor < len(self.model_body): 227 while new_cursor < len(self.model_body):
190 entry = self.model_body[new_cursor] 228 entry = self.model_body[new_cursor]
191 if not is_suitable_header_object(entry): 229 if not is_suitable_header_object(entry):
230 # looks like the header ended
192 break 231 break
193 if isinstance(entry, linetypes.MetaCommand): 232 if isinstance(entry, linetypes.MetaCommand):
194 self.cursor = new_cursor 233 self.cursor = new_cursor
195 yield entry 234 yield entry
196 new_cursor += 1 235 new_cursor += 1
197 def skip_to_next(self, *, spaces_expected = 0): 236 def skip_to_next(self, *, spaces_expected = 0):
237 '''
238 Skip to the next header line.
239 '''
198 while True: 240 while True:
199 if self.cursor + 1 >= len(self.model_body): 241 if self.cursor + 1 >= len(self.model_body):
242 # wound up past the end of model
200 self.parse_error('file does not have a proper header') 243 self.parse_error('file does not have a proper header')
201 self.cursor += 1 244 self.cursor += 1
202 entry = self.model_body[self.cursor] 245 entry = self.model_body[self.cursor]
203 if not is_suitable_header_object(entry): 246 if not is_suitable_header_object(entry):
204 self.parse_error('header is incomplete') 247 self.parse_error('header is incomplete')
205 if isinstance(entry, linetypes.MetaCommand): 248 if isinstance(entry, linetypes.MetaCommand):
206 return 249 return
207 def try_to_match(self, pattern, patterntype): 250 def try_to_match(self, pattern, patterntype):
251 '''
252 Tries to parse the specified pattern and to store the groups in
253 self.groups. Returns whether or not this succeeded.
254 '''
208 try: 255 try:
209 self.groups = self.parse_pattern(pattern, patterntype) 256 self.groups = self.parse_pattern(pattern, patterntype)
210 return True 257 return True
211 except: 258 except:
212 return False 259 return False
213 def current(self): 260 def current(self):
261 '''
262 Returns the text of the header line we're currently processing.
263 '''
214 entry = self.model_body[self.cursor] 264 entry = self.model_body[self.cursor]
215 assert isinstance(entry, linetypes.MetaCommand) 265 assert isinstance(entry, linetypes.MetaCommand)
216 return entry.text 266 return entry.text
217 def parse_pattern(self, pattern, description): 267 def parse_pattern(self, pattern, description):
268 '''
269 Matches the current header line against the specified pattern.
270 If not, raises an exception. See try_to_match for a softer wrapper
271 that does not raise exceptions.
272 '''
218 match = re.search(pattern, self.current()) 273 match = re.search(pattern, self.current())
219 if match: 274 if match:
220 self.order.append(description) 275 self.order.append(description)
221 if description not in self.result.first_occurrence: 276 list.append(self.result.occurrences[description], self.cursor)
222 self.result.first_occurrence[description] = self.cursor
223 return match.groups() 277 return match.groups()
224 else: 278 else:
225 self.parse_error(str.format("couldn't parse {}", description)) 279 self.parse_error(str.format("couldn't parse {}", description))

mercurial