Thu, 27 Aug 2020 00:27:42 +0300
...
47 | 1 | import re |
2 | import linetypes | |
48 | 3 | import datetime |
47 | 4 | |
5 | class Header: | |
97 | 6 | ''' |
7 | Result type of header processing, this contains all the header | |
8 | information. | |
9 | ''' | |
47 | 10 | def __init__(self): |
11 | self.description = None | |
12 | self.name = None | |
13 | self.author = None | |
14 | self.username = None | |
15 | self.filetype = None | |
16 | self.qualifiers = None | |
17 | self.license = None | |
48 | 18 | self.help = '' |
47 | 19 | self.bfc = None |
20 | self.category = None | |
48 | 21 | self.keywords = '' |
47 | 22 | self.cmdline = None |
23 | self.history = [] | |
101
745f2c3aec0a
added ability to get other header elements than just the first one
Teemu Piippo <teemu@hecknology.net>
parents:
97
diff
changeset
|
24 | from collections import defaultdict |
745f2c3aec0a
added ability to get other header elements than just the first one
Teemu Piippo <teemu@hecknology.net>
parents:
97
diff
changeset
|
25 | self.occurrences = defaultdict(list) |
48 | 26 | @property |
27 | def valid(self): | |
28 | return True | |
69
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
29 | @property |
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
30 | def effective_filetype(self): |
97 | 31 | ''' |
32 | What's the effective file type? The "Unofficial_" prefix is | |
33 | left out. | |
34 | ''' | |
69
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
35 | if self.filetype.startswith('Unofficial_'): |
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
36 | return self.filetype.rsplit('Unofficial_')[1] |
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
37 | else: |
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
38 | return self.filetype |
79
eb93feb6d3a3
added a test for valid categories
Teemu Piippo <teemu@hecknology.net>
parents:
77
diff
changeset
|
39 | @property |
eb93feb6d3a3
added a test for valid categories
Teemu Piippo <teemu@hecknology.net>
parents:
77
diff
changeset
|
40 | def effective_category(self): |
97 | 41 | ''' |
42 | Returns the category of the part. Leading punctuation marks | |
43 | are ignored. | |
44 | ''' | |
79
eb93feb6d3a3
added a test for valid categories
Teemu Piippo <teemu@hecknology.net>
parents:
77
diff
changeset
|
45 | if self.category: |
eb93feb6d3a3
added a test for valid categories
Teemu Piippo <teemu@hecknology.net>
parents:
77
diff
changeset
|
46 | return self.category |
eb93feb6d3a3
added a test for valid categories
Teemu Piippo <teemu@hecknology.net>
parents:
77
diff
changeset
|
47 | else: |
84
55d52e25267f
fixed prefixed punctuations winding up in the effective categories of subparts
Teemu Piippo <teemu@hecknology.net>
parents:
79
diff
changeset
|
48 | import string |
55d52e25267f
fixed prefixed punctuations winding up in the effective categories of subparts
Teemu Piippo <teemu@hecknology.net>
parents:
79
diff
changeset
|
49 | category = self.description.split(' ', 1)[0] |
55d52e25267f
fixed prefixed punctuations winding up in the effective categories of subparts
Teemu Piippo <teemu@hecknology.net>
parents:
79
diff
changeset
|
50 | while category and category[0] in string.punctuation: |
55d52e25267f
fixed prefixed punctuations winding up in the effective categories of subparts
Teemu Piippo <teemu@hecknology.net>
parents:
79
diff
changeset
|
51 | category = category[1:] |
55d52e25267f
fixed prefixed punctuations winding up in the effective categories of subparts
Teemu Piippo <teemu@hecknology.net>
parents:
79
diff
changeset
|
52 | return category |
47 | 53 | |
54 | class BadHeader: | |
97 | 55 | ''' |
56 | If header processing fails this object is returned as the resulting | |
57 | header instead. It contains the details of where the header could not | |
58 | be understood and why. | |
59 | ''' | |
47 | 60 | def __init__(self, index, reason): |
61 | self.index = index | |
62 | self.reason = reason | |
63 | def __repr__(self): | |
64 | return str.format( | |
65 | 'header.BadHeader(index = {index!r}, reason = {reason!r})', | |
66 | index = self.index, | |
67 | reason = self.reason, | |
68 | ) | |
48 | 69 | @property |
70 | def valid(self): | |
71 | return False | |
47 | 72 | |
56
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
73 | def is_invertnext(entry): |
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
74 | return isinstance(entry, linetypes.MetaCommand) \ |
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
75 | and entry.text == "BFC INVERTNEXT" |
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
76 | |
47 | 77 | def is_suitable_header_object(entry): |
97 | 78 | ''' |
79 | Is the given object something that we can consider to be | |
80 | part of the header? | |
81 | ''' | |
56
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
82 | if is_invertnext(entry): |
97 | 83 | # It's BFC INVERTNEXT, that's not a header command. |
56
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
84 | return False |
97 | 85 | # Check if it's one of the functional linetypes |
47 | 86 | return not any( |
87 | isinstance(entry, linetype) | |
88 | for linetype in [ | |
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
89 | linetypes.SubfileReference, |
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
90 | linetypes.LineSegment, |
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
91 | linetypes.Triangle, |
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
92 | linetypes.Quadrilateral, |
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
93 | linetypes.ConditionalLine, |
47 | 94 | linetypes.Comment, |
95 | linetypes.Error, | |
96 | ] | |
97 | ) | |
98 | ||
99 | class HeaderError(Exception): | |
97 | 100 | ''' |
101 | An error raised during header parsing | |
102 | ''' | |
47 | 103 | def __init__(self, index, reason): |
104 | self.index, self.reason = index, reason | |
105 | def __repr__(self): | |
106 | return str.format( | |
107 | 'HeaderError({index!r}, {reason!r})', | |
108 | index = self.index, | |
109 | reason = self.reason, | |
110 | ) | |
111 | def __str__(self): | |
112 | return reason | |
113 | ||
48 | 114 | class HistoryEntry: |
97 | 115 | ''' |
116 | Represents a single !HISTORY entry | |
117 | ''' | |
48 | 118 | def __init__(self, date, user, text): |
119 | self.date, self.user, self.text = date, user, text | |
120 | def __repr__(self): | |
121 | return str.format( | |
122 | 'HistoryEntry({date!r}, {user!r}, {text!r})', | |
123 | date = self.date, | |
124 | user = self.user, | |
125 | text = self.text) | |
126 | ||
47 | 127 | class HeaderParser: |
128 | def __init__(self): | |
129 | self.model_body = None | |
130 | self.cursor = 0 | |
131 | self.problems = [] | |
132 | def parse(self, model_body): | |
133 | result = Header() | |
48 | 134 | self.result = result |
47 | 135 | self.order = [] |
136 | self.cursor = -1 | |
137 | self.model_body = model_body | |
138 | self.skip_to_next() | |
139 | result.description = self.current() | |
140 | self.skip_to_next() | |
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
141 | result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0] |
47 | 142 | self.skip_to_next() |
97 | 143 | # Parse author line |
144 | result.author, result.username = self.parse_pattern(r'^Author: (?:([^\[]+))?(?:\[([^\]]+)\])?', 'author') | |
145 | if isinstance(result.author, str): | |
146 | # clean leading spaces | |
147 | result.author = str.strip(result.author) | |
76
c73432653fd9
fixed choking on 'Author: [PTAdmin]'-lines
Teemu Piippo <teemu@hecknology.net>
parents:
69
diff
changeset
|
148 | if not result.author and not result.username: |
c73432653fd9
fixed choking on 'Author: [PTAdmin]'-lines
Teemu Piippo <teemu@hecknology.net>
parents:
69
diff
changeset
|
149 | self.parse_error('author line does not contain a name nor username') |
97 | 150 | # use more patterns to parse the rest of the header |
47 | 151 | for header_entry in self.get_more_header_stuff(): |
152 | if self.try_to_match( | |
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
153 | r'^!LDRAW_ORG ' \ |
49 | 154 | r'((?:Unofficial_)?(?:' \ |
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
155 | r'Part|' \ |
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
156 | r'Subpart|' \ |
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
157 | r'Primitive|' \ |
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
158 | r'8_Primitive|' \ |
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
159 | r'48_Primitive|' \ |
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
160 | r'Shortcut' \ |
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
161 | r'))\s?' \ |
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
162 | r'(.*)$', |
47 | 163 | 'part type'): |
48 | 164 | result.filetype = self.groups[0] |
165 | result.qualifiers = re.findall(r'(?:Physical_Colour|Alias|ORIGINAL|UPDATE \d\d\d\d-\d\d)', self.groups[1]) | |
47 | 166 | elif self.try_to_match( |
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
167 | r'^!LICENSE (.+)$', |
47 | 168 | 'license'): |
48 | 169 | result.license = self.groups[0] |
170 | elif self.try_to_match( | |
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
171 | r'BFC (CERTIFY CW|CERTIFY CCW|NOCERTIFY)', |
48 | 172 | 'bfc'): |
173 | result.bfc = self.groups[0] | |
174 | elif self.try_to_match( | |
175 | r'!HISTORY (\d{4}-\d{2}-\d{2}) ([\[{][^\]}]+[\]}]) (.+)$', | |
176 | 'history'): | |
59
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
177 | try: |
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
178 | time_object = datetime.datetime.strptime( |
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
179 | self.groups[0], |
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
180 | '%Y-%m-%d', |
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
181 | ) |
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
182 | except ValueError: |
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
183 | self.parse_error("invalid ISO date in history") |
48 | 184 | result.history.append(HistoryEntry( |
59
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
185 | date = time_object.date(), |
48 | 186 | user = self.groups[1], |
187 | text = self.groups[2], | |
188 | )) | |
189 | elif self.try_to_match( | |
190 | r'!HELP (.+)', | |
191 | 'help'): | |
192 | if result.help: | |
193 | result.help += '\n' | |
194 | result.help += self.groups[0] | |
195 | elif self.try_to_match( | |
196 | r'!CATEGORY (.+)', | |
197 | 'category'): | |
198 | result.category = self.groups[0] | |
199 | elif self.try_to_match( | |
200 | r'!KEYWORDS (.+)', | |
201 | 'keywords'): | |
202 | if result.keywords: | |
203 | result.keywords += '\n' | |
204 | result.keywords += self.groups[0] | |
205 | elif self.try_to_match( | |
206 | r'!CMDLINE (.+)', | |
207 | 'cmdline'): | |
208 | result.cmdline = self.groups[0] | |
47 | 209 | else: |
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
210 | self.cursor -= 1 |
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
211 | break |
67
afaa4d3bc3e5
complain if LDRAW_ORG line is missing
Teemu Piippo <teemu@hecknology.net>
parents:
60
diff
changeset
|
212 | if not result.filetype: |
afaa4d3bc3e5
complain if LDRAW_ORG line is missing
Teemu Piippo <teemu@hecknology.net>
parents:
60
diff
changeset
|
213 | self.parse_error('LDRAW_ORG line is missing') |
47 | 214 | return { |
215 | 'header': result, | |
97 | 216 | 'end-index': self.cursor + 1, # record where the header ended |
47 | 217 | } |
218 | def parse_error(self, message): | |
219 | raise HeaderError(index = self.cursor, reason = message) | |
220 | def get_more_header_stuff(self): | |
97 | 221 | ''' |
222 | Iterates through the header and yields metacommand entries | |
223 | one after the other. | |
224 | ''' | |
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
225 | self.cursor += 1 |
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
226 | new_cursor = self.cursor |
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
227 | while new_cursor < len(self.model_body): |
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
228 | entry = self.model_body[new_cursor] |
47 | 229 | if not is_suitable_header_object(entry): |
97 | 230 | # looks like the header ended |
47 | 231 | break |
232 | if isinstance(entry, linetypes.MetaCommand): | |
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
233 | self.cursor = new_cursor |
47 | 234 | yield entry |
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
235 | new_cursor += 1 |
47 | 236 | def skip_to_next(self, *, spaces_expected = 0): |
97 | 237 | ''' |
238 | Skip to the next header line. | |
239 | ''' | |
47 | 240 | while True: |
241 | if self.cursor + 1 >= len(self.model_body): | |
97 | 242 | # wound up past the end of model |
54
0c686d10eb49
added tests for moved-to files and scaling in flat dimensions
Teemu Piippo <teemu@hecknology.net>
parents:
52
diff
changeset
|
243 | self.parse_error('file does not have a proper header') |
47 | 244 | self.cursor += 1 |
245 | entry = self.model_body[self.cursor] | |
246 | if not is_suitable_header_object(entry): | |
247 | self.parse_error('header is incomplete') | |
248 | if isinstance(entry, linetypes.MetaCommand): | |
249 | return | |
250 | def try_to_match(self, pattern, patterntype): | |
97 | 251 | ''' |
252 | Tries to parse the specified pattern and to store the groups in | |
253 | self.groups. Returns whether or not this succeeded. | |
254 | ''' | |
47 | 255 | try: |
256 | self.groups = self.parse_pattern(pattern, patterntype) | |
48 | 257 | return True |
47 | 258 | except: |
259 | return False | |
260 | def current(self): | |
97 | 261 | ''' |
262 | Returns the text of the header line we're currently processing. | |
263 | ''' | |
47 | 264 | entry = self.model_body[self.cursor] |
265 | assert isinstance(entry, linetypes.MetaCommand) | |
266 | return entry.text | |
267 | def parse_pattern(self, pattern, description): | |
97 | 268 | ''' |
269 | Matches the current header line against the specified pattern. | |
270 | If not, raises an exception. See try_to_match for a softer wrapper | |
271 | that does not raise exceptions. | |
272 | ''' | |
47 | 273 | match = re.search(pattern, self.current()) |
274 | if match: | |
275 | self.order.append(description) | |
101
745f2c3aec0a
added ability to get other header elements than just the first one
Teemu Piippo <teemu@hecknology.net>
parents:
97
diff
changeset
|
276 | list.append(self.result.occurrences[description], self.cursor) |
47 | 277 | return match.groups() |
278 | else: | |
279 | self.parse_error(str.format("couldn't parse {}", description)) |