Sun, 23 Jun 2019 12:17:40 +0300
added a test for unknown metacommands
| 47 | 1 | import re |
| 2 | import linetypes | |
| 48 | 3 | import datetime |
| 47 | 4 | |
| 5 | class Header: | |
| 6 | def __init__(self): | |
| 7 | self.description = None | |
| 8 | self.name = None | |
| 9 | self.author = None | |
| 10 | self.username = None | |
| 11 | self.filetype = None | |
| 12 | self.qualifiers = None | |
| 13 | self.license = None | |
| 48 | 14 | self.help = '' |
| 47 | 15 | self.bfc = None |
| 16 | self.category = None | |
| 48 | 17 | self.keywords = '' |
| 47 | 18 | self.cmdline = None |
| 19 | self.history = [] | |
| 48 | 20 | self.first_occurrence = dict() |
| 21 | @property | |
| 22 | def valid(self): | |
| 23 | return True | |
|
69
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
24 | @property |
|
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
25 | def effective_filetype(self): |
|
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
26 | if self.filetype.startswith('Unofficial_'): |
|
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
27 | return self.filetype.rsplit('Unofficial_')[1] |
|
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
28 | else: |
|
a24c4490d9f2
added a check for keywords in non-parts
Teemu Piippo <teemu@hecknology.net>
parents:
67
diff
changeset
|
29 | return self.filetype |
| 47 | 30 | |
| 31 | class BadHeader: | |
| 32 | def __init__(self, index, reason): | |
| 33 | self.index = index | |
| 34 | self.reason = reason | |
| 35 | def __repr__(self): | |
| 36 | return str.format( | |
| 37 | 'header.BadHeader(index = {index!r}, reason = {reason!r})', | |
| 38 | index = self.index, | |
| 39 | reason = self.reason, | |
| 40 | ) | |
| 48 | 41 | @property |
| 42 | def valid(self): | |
| 43 | return False | |
| 47 | 44 | |
|
56
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
45 | def is_invertnext(entry): |
|
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
46 | return isinstance(entry, linetypes.MetaCommand) \ |
|
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
47 | and entry.text == "BFC INVERTNEXT" |
|
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
48 | |
| 47 | 49 | def is_suitable_header_object(entry): |
|
56
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
50 | if is_invertnext(entry): |
|
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
51 | # BFC INVERTNEXT is not a header command anymore. |
|
ed6d39c59e56
fixed BFC INVERTNEXT being interpreted as a header command
Teemu Piippo <teemu@hecknology.net>
parents:
54
diff
changeset
|
52 | return False |
| 47 | 53 | return not any( |
| 54 | isinstance(entry, linetype) | |
| 55 | for linetype in [ | |
|
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
56 | linetypes.SubfileReference, |
|
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
57 | linetypes.LineSegment, |
|
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
58 | linetypes.Triangle, |
|
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
59 | linetypes.Quadrilateral, |
|
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
60 | linetypes.ConditionalLine, |
| 47 | 61 | linetypes.Comment, |
| 62 | linetypes.Error, | |
| 63 | ] | |
| 64 | ) | |
| 65 | ||
| 66 | class HeaderError(Exception): | |
| 67 | def __init__(self, index, reason): | |
| 68 | self.index, self.reason = index, reason | |
| 69 | def __repr__(self): | |
| 70 | return str.format( | |
| 71 | 'HeaderError({index!r}, {reason!r})', | |
| 72 | index = self.index, | |
| 73 | reason = self.reason, | |
| 74 | ) | |
| 75 | def __str__(self): | |
| 76 | return reason | |
| 77 | ||
| 48 | 78 | class HistoryEntry: |
| 79 | def __init__(self, date, user, text): | |
| 80 | self.date, self.user, self.text = date, user, text | |
| 81 | def __repr__(self): | |
| 82 | return str.format( | |
| 83 | 'HistoryEntry({date!r}, {user!r}, {text!r})', | |
| 84 | date = self.date, | |
| 85 | user = self.user, | |
| 86 | text = self.text) | |
| 87 | ||
| 47 | 88 | class HeaderParser: |
| 89 | def __init__(self): | |
| 90 | self.model_body = None | |
| 91 | self.cursor = 0 | |
| 92 | self.problems = [] | |
| 93 | def parse(self, model_body): | |
| 94 | result = Header() | |
| 48 | 95 | self.result = result |
| 47 | 96 | self.order = [] |
| 97 | self.cursor = -1 | |
| 98 | self.model_body = model_body | |
| 99 | self.skip_to_next() | |
| 100 | result.description = self.current() | |
| 101 | self.skip_to_next() | |
|
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
102 | result.name = self.parse_pattern(r'^Name: (.+)$', 'name')[0] |
| 47 | 103 | self.skip_to_next() |
|
76
c73432653fd9
fixed choking on 'Author: [PTAdmin]'-lines
Teemu Piippo <teemu@hecknology.net>
parents:
69
diff
changeset
|
104 | result.author, result.username = self.parse_pattern(r'^Author: (?:([^ \[]*[^\[]+) )?(?:\[([^\]]+)\])?', 'author') |
|
c73432653fd9
fixed choking on 'Author: [PTAdmin]'-lines
Teemu Piippo <teemu@hecknology.net>
parents:
69
diff
changeset
|
105 | if not result.author and not result.username: |
|
c73432653fd9
fixed choking on 'Author: [PTAdmin]'-lines
Teemu Piippo <teemu@hecknology.net>
parents:
69
diff
changeset
|
106 | self.parse_error('author line does not contain a name nor username') |
| 47 | 107 | for header_entry in self.get_more_header_stuff(): |
| 108 | if self.try_to_match( | |
|
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
109 | r'^!LDRAW_ORG ' \ |
| 49 | 110 | r'((?:Unofficial_)?(?:' \ |
|
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
111 | r'Part|' \ |
|
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
112 | r'Subpart|' \ |
|
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
113 | r'Primitive|' \ |
|
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
114 | r'8_Primitive|' \ |
|
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
115 | r'48_Primitive|' \ |
|
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
116 | r'Shortcut' \ |
|
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
117 | r'))\s?' \ |
|
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
118 | r'(.*)$', |
| 47 | 119 | 'part type'): |
| 48 | 120 | result.filetype = self.groups[0] |
| 121 | result.qualifiers = re.findall(r'(?:Physical_Colour|Alias|ORIGINAL|UPDATE \d\d\d\d-\d\d)', self.groups[1]) | |
| 47 | 122 | elif self.try_to_match( |
|
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
123 | r'^!LICENSE (.+)$', |
| 47 | 124 | 'license'): |
| 48 | 125 | result.license = self.groups[0] |
| 126 | elif self.try_to_match( | |
|
52
cd2b4f3c1189
fix author parsing getting extra spaces in the name
Teemu Piippo <teemu@hecknology.net>
parents:
49
diff
changeset
|
127 | r'BFC (CERTIFY CW|CERTIFY CCW|NOCERTIFY)', |
| 48 | 128 | 'bfc'): |
| 129 | result.bfc = self.groups[0] | |
| 130 | elif self.try_to_match( | |
| 131 | r'!HISTORY (\d{4}-\d{2}-\d{2}) ([\[{][^\]}]+[\]}]) (.+)$', | |
| 132 | 'history'): | |
|
59
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
133 | try: |
|
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
134 | time_object = datetime.datetime.strptime( |
|
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
135 | self.groups[0], |
|
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
136 | '%Y-%m-%d', |
|
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
137 | ) |
|
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
138 | except ValueError: |
|
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
139 | self.parse_error("invalid ISO date in history") |
| 48 | 140 | result.history.append(HistoryEntry( |
|
59
0f3e70a2bb4b
report invalid ISO dates instead of crashing
Teemu Piippo <teemu@hecknology.net>
parents:
56
diff
changeset
|
141 | date = time_object.date(), |
| 48 | 142 | user = self.groups[1], |
| 143 | text = self.groups[2], | |
| 144 | )) | |
| 145 | elif self.try_to_match( | |
| 146 | r'!HELP (.+)', | |
| 147 | 'help'): | |
| 148 | if result.help: | |
| 149 | result.help += '\n' | |
| 150 | result.help += self.groups[0] | |
| 151 | elif self.try_to_match( | |
| 152 | r'!CATEGORY (.+)', | |
| 153 | 'category'): | |
| 154 | result.category = self.groups[0] | |
| 155 | elif self.try_to_match( | |
| 156 | r'!KEYWORDS (.+)', | |
| 157 | 'keywords'): | |
| 158 | if result.keywords: | |
| 159 | result.keywords += '\n' | |
| 160 | result.keywords += self.groups[0] | |
| 161 | elif self.try_to_match( | |
| 162 | r'!CMDLINE (.+)', | |
| 163 | 'cmdline'): | |
| 164 | result.cmdline = self.groups[0] | |
| 47 | 165 | else: |
|
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
166 | self.cursor -= 1 |
|
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
167 | break |
|
67
afaa4d3bc3e5
complain if LDRAW_ORG line is missing
Teemu Piippo <teemu@hecknology.net>
parents:
60
diff
changeset
|
168 | if not result.filetype: |
|
afaa4d3bc3e5
complain if LDRAW_ORG line is missing
Teemu Piippo <teemu@hecknology.net>
parents:
60
diff
changeset
|
169 | self.parse_error('LDRAW_ORG line is missing') |
| 47 | 170 | return { |
| 171 | 'header': result, | |
| 172 | 'end-index': self.cursor + 1, | |
| 173 | } | |
| 174 | def parse_error(self, message): | |
| 175 | raise HeaderError(index = self.cursor, reason = message) | |
| 176 | def get_more_header_stuff(self): | |
|
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
177 | self.cursor += 1 |
|
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
178 | new_cursor = self.cursor |
|
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
179 | while new_cursor < len(self.model_body): |
|
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
180 | entry = self.model_body[new_cursor] |
| 47 | 181 | if not is_suitable_header_object(entry): |
| 182 | break | |
| 183 | if isinstance(entry, linetypes.MetaCommand): | |
|
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
184 | self.cursor = new_cursor |
| 47 | 185 | yield entry |
|
77
d98502ae1f33
improved header extent scanning
Teemu Piippo <teemu@hecknology.net>
parents:
76
diff
changeset
|
186 | new_cursor += 1 |
| 47 | 187 | def skip_to_next(self, *, spaces_expected = 0): |
| 188 | while True: | |
| 189 | if self.cursor + 1 >= len(self.model_body): | |
|
54
0c686d10eb49
added tests for moved-to files and scaling in flat dimensions
Teemu Piippo <teemu@hecknology.net>
parents:
52
diff
changeset
|
190 | self.parse_error('file does not have a proper header') |
| 47 | 191 | self.cursor += 1 |
| 192 | entry = self.model_body[self.cursor] | |
| 193 | if not is_suitable_header_object(entry): | |
| 194 | self.parse_error('header is incomplete') | |
| 195 | if isinstance(entry, linetypes.MetaCommand): | |
| 196 | return | |
| 197 | def try_to_match(self, pattern, patterntype): | |
| 198 | try: | |
| 199 | self.groups = self.parse_pattern(pattern, patterntype) | |
| 48 | 200 | return True |
| 47 | 201 | except: |
| 202 | return False | |
| 203 | def current(self): | |
| 204 | entry = self.model_body[self.cursor] | |
| 205 | assert isinstance(entry, linetypes.MetaCommand) | |
| 206 | return entry.text | |
| 207 | def parse_pattern(self, pattern, description): | |
| 208 | match = re.search(pattern, self.current()) | |
| 209 | if match: | |
| 210 | self.order.append(description) | |
| 48 | 211 | if description not in self.result.first_occurrence: |
| 212 | self.result.first_occurrence[description] = self.cursor | |
| 47 | 213 | return match.groups() |
| 214 | else: | |
| 215 | self.parse_error(str.format("couldn't parse {}", description)) |