No flags found
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
7de21ac
... +1 ...
f4457b8
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
1 | + | """Methods for deducing and understanding indents.""" |
|
2 | + | ||
3 | + | import logging |
|
4 | + | ||
5 | + | from sqlfluff.core.parser import RawSegment, BaseSegment |
|
6 | + | ||
7 | + | ||
8 | + | # We're in the utils module, but users will expect reflow |
|
9 | + | # logs to appear in the context of rules. Hence it's a subset |
|
10 | + | # of the rules logger. |
|
11 | + | reflow_logger = logging.getLogger("sqlfluff.rules.reflow") |
|
12 | + | ||
13 | + | ||
14 | + | def deduce_line_indent(raw_segment: RawSegment, root_segment: BaseSegment) -> str: |
|
15 | + | """Given a raw segment, deduce the indent of it's line.""" |
|
16 | + | seg_idx = root_segment.raw_segments.index(raw_segment) |
|
17 | + | indent_seg = None |
|
18 | + | for seg in root_segment.raw_segments[seg_idx::-1]: |
|
19 | + | if seg.is_code: |
|
20 | + | indent_seg = None |
|
21 | + | elif seg.is_type("whitespace"): |
|
22 | + | indent_seg = seg |
|
23 | + | elif seg.is_type("newline"): |
|
24 | + | break |
|
25 | + | reflow_logger.debug("Deduced indent for %s as %s", raw_segment, indent_seg) |
|
26 | + | if indent_seg: |
|
27 | + | return indent_seg.raw |
|
28 | + | else: |
|
29 | + | return "" |
1 | + | """Static methods to support ReflowPoint.respace_point().""" |
|
2 | + | ||
3 | + | ||
4 | + | import logging |
|
5 | + | from typing import List, Optional, Tuple, cast, TYPE_CHECKING |
|
6 | + | ||
7 | + | from sqlfluff.core.parser import BaseSegment, RawSegment |
|
8 | + | from sqlfluff.core.parser.segments.raw import WhitespaceSegment |
|
9 | + | from sqlfluff.core.rules.base import LintFix |
|
10 | + | ||
11 | + | ||
12 | + | if TYPE_CHECKING: |
|
13 | + | from sqlfluff.utils.reflow.elements import ReflowBlock |
|
14 | + | ||
15 | + | ||
16 | + | # We're in the utils module, but users will expect reflow |
|
17 | + | # logs to appear in the context of rules. Hence it's a subset |
|
18 | + | # of the rules logger. |
|
19 | + | reflow_logger = logging.getLogger("sqlfluff.rules.reflow") |
|
20 | + | ||
21 | + | ||
22 | + | def determine_constraints( |
|
23 | + | prev_block: Optional["ReflowBlock"], |
|
24 | + | next_block: Optional["ReflowBlock"], |
|
25 | + | strip_newlines: bool = False, |
|
26 | + | ) -> Tuple[str, str, bool]: |
|
27 | + | """Given the surrounding blocks, determine appropriate constraints.""" |
|
28 | + | # Start with the defaults. |
|
29 | + | pre_constraint = prev_block.spacing_after if prev_block else "single" |
|
30 | + | post_constraint = next_block.spacing_before if next_block else "single" |
|
31 | + | ||
32 | + | # Work out the common parent segment and depth |
|
33 | + | if prev_block and next_block: |
|
34 | + | common = prev_block.depth_info.common_with(next_block.depth_info) |
|
35 | + | # Just check the most immediate parent for now for speed. |
|
36 | + | # TODO: Review whether just checking the parent is enough. |
|
37 | + | # NOTE: spacing configs will be available on both sides if they're common |
|
38 | + | # so it doesn't matter whether we get it from prev_block or next_block. |
|
39 | + | within_constraint = prev_block.stack_spacing_configs.get(common[-1], None) |
|
40 | + | if not within_constraint: |
|
41 | + | pass |
|
42 | + | elif within_constraint in ("touch", "inline"): |
|
43 | + | # NOTE: inline is actually a more extreme version of "touch". |
|
44 | + | # Examples: |
|
45 | + | # - "inline" would be used with an object reference, where the |
|
46 | + | # parts have to all be together on one line like `a.b.c`. |
|
47 | + | # - "touch" would allow the above layout, _but also_ allow an |
|
48 | + | # an optional line break between, much like between an opening |
|
49 | + | # bracket and the following element: `(a)` or: |
|
50 | + | # ``` |
|
51 | + | # ( |
|
52 | + | # a |
|
53 | + | # ) |
|
54 | + | # ``` |
|
55 | + | if within_constraint == "inline": |
|
56 | + | # If they are then strip newlines. |
|
57 | + | strip_newlines = True |
|
58 | + | # If segments are expected to be touch within. Then modify |
|
59 | + | # constraints accordingly. |
|
60 | + | # NOTE: We don't override if it's already "any" |
|
61 | + | if pre_constraint != "any": |
|
62 | + | pre_constraint = "touch" |
|
63 | + | if post_constraint != "any": |
|
64 | + | post_constraint = "touch" |
|
65 | + | else: # pragma: no cover |
|
66 | + | idx = prev_block.depth_info.stack_hashes.index(common[-1]) |
|
67 | + | raise NotImplementedError( |
|
68 | + | f"Unexpected within constraint: {within_constraint} for " |
|
69 | + | f"{prev_block.depth_info.stack_class_types[idx]}" |
|
70 | + | ) |
|
71 | + | ||
72 | + | return pre_constraint, post_constraint, strip_newlines |
|
73 | + | ||
74 | + | ||
75 | + | def process_spacing( |
|
76 | + | segment_buffer: List[RawSegment], strip_newlines: bool = False |
|
77 | + | ) -> Tuple[List[RawSegment], Optional[RawSegment], List[LintFix]]: |
|
78 | + | """Given the existing spacing, extract information and do basic pruning.""" |
|
79 | + | removal_buffer: List[RawSegment] = [] |
|
80 | + | last_whitespace: List[RawSegment] = [] |
|
81 | + | ||
82 | + | # Loop through the existing segments looking for spacing. |
|
83 | + | for seg in segment_buffer: |
|
84 | + | ||
85 | + | # If it's whitespace, store it. |
|
86 | + | if seg.is_type("whitespace"): |
|
87 | + | last_whitespace.append(seg) |
|
88 | + | ||
89 | + | # If it's a newline, react accordingly. |
|
90 | + | elif seg.is_type("newline", "end_of_file"): |
|
91 | + | ||
92 | + | # Are we stripping newlines? |
|
93 | + | if strip_newlines and seg.is_type("newline"): |
|
94 | + | reflow_logger.debug(" Stripping newline: %s", seg) |
|
95 | + | removal_buffer.append(seg) |
|
96 | + | # Carry on as though it wasn't here. |
|
97 | + | continue |
|
98 | + | ||
99 | + | # Check if we've just passed whitespace. If we have, remove it |
|
100 | + | # as trailing whitespace, both from the buffer and create a fix. |
|
101 | + | if last_whitespace: |
|
102 | + | reflow_logger.debug(" Removing trailing whitespace.") |
|
103 | + | for ws in last_whitespace: |
|
104 | + | removal_buffer.append(ws) |
|
105 | + | ||
106 | + | # Regardless, unset last_whitespace. |
|
107 | + | # We either just deleted it, or it's not relevant for any future |
|
108 | + | # segments. |
|
109 | + | last_whitespace = [] |
|
110 | + | ||
111 | + | if len(last_whitespace) >= 2: |
|
112 | + | reflow_logger.debug(" Removing adjoining whitespace.") |
|
113 | + | # If we find multiple sequential whitespaces, it's the sign |
|
114 | + | # that we've removed something. Only the first one should be |
|
115 | + | # a valid indent (or the one we consider for constraints). |
|
116 | + | # Remove all the following ones. |
|
117 | + | for ws in last_whitespace[1:]: |
|
118 | + | removal_buffer.append(ws) |
|
119 | + | ||
120 | + | # Turn the removal buffer updated segment buffer, last whitespace |
|
121 | + | # and associated fixes. |
|
122 | + | return ( |
|
123 | + | [s for s in segment_buffer if s not in removal_buffer], |
|
124 | + | # We should have removed all other whitespace by now. |
|
125 | + | last_whitespace[0] if last_whitespace else None, |
|
126 | + | [LintFix.delete(s) for s in removal_buffer], |
|
127 | + | ) |
|
128 | + | ||
129 | + | ||
130 | + | def _determine_aligned_inline_spacing( |
|
131 | + | root_segment: BaseSegment, |
|
132 | + | whitespace_seg: RawSegment, |
|
133 | + | next_seg: RawSegment, |
|
134 | + | segment_type: str, |
|
135 | + | align_within: Optional[str], |
|
136 | + | align_boundary: Optional[str], |
|
137 | + | ) -> str: |
|
138 | + | """Work out spacing for instance of an `align` constraint.""" |
|
139 | + | # Find the level of segment that we're aligning. |
|
140 | + | # NOTE: Reverse slice |
|
141 | + | parent_segment = None |
|
142 | + | for ps in root_segment.path_to(next_seg)[::-1]: |
|
143 | + | if ps.segment.is_type(align_within): |
|
144 | + | parent_segment = ps.segment |
|
145 | + | if ps.segment.is_type(align_boundary): |
|
146 | + | break |
|
147 | + | ||
148 | + | if not parent_segment: |
|
149 | + | reflow_logger.debug(" No Parent found for alignment case. Treat as single.") |
|
150 | + | return " " |
|
151 | + | ||
152 | + | # We've got a parent. Find some siblings. |
|
153 | + | reflow_logger.debug(" Determining alignment within: %s", parent_segment) |
|
154 | + | siblings = [] |
|
155 | + | for sibling in parent_segment.recursive_crawl(segment_type): |
|
156 | + | # Purge any siblings with a boundary between them |
|
157 | + | if not any( |
|
158 | + | ps.segment.is_type(align_boundary) for ps in parent_segment.path_to(sibling) |
|
159 | + | ): |
|
160 | + | siblings.append(sibling) |
|
161 | + | else: |
|
162 | + | reflow_logger.debug( |
|
163 | + | " Purging a sibling because they're blocked " "by a boundary: %s", |
|
164 | + | sibling, |
|
165 | + | ) |
|
166 | + | ||
167 | + | # Is the current indent the only one on the line? |
|
168 | + | if any( |
|
169 | + | # Same line |
|
170 | + | sibling.pos_marker.working_line_no == next_seg.pos_marker.working_line_no |
|
171 | + | # And not same position (i.e. not self) |
|
172 | + | and sibling.pos_marker.working_line_pos != next_seg.pos_marker.working_line_pos |
|
173 | + | for sibling in siblings |
|
174 | + | ): |
|
175 | + | reflow_logger.debug(" Found sibling on same line. Treat as single") |
|
176 | + | return " " |
|
177 | + | ||
178 | + | # Work out the current spacing before each. |
|
179 | + | last_code = None |
|
180 | + | max_desired_line_pos = 0 |
|
181 | + | for seg in parent_segment.raw_segments: |
|
182 | + | for sibling in siblings: |
|
183 | + | # NOTE: We're asserting that there must have been |
|
184 | + | # a last_code. Otherwise this won't work. |
|
185 | + | if ( |
|
186 | + | seg.pos_marker.working_loc == sibling.pos_marker.working_loc |
|
187 | + | and last_code |
|
188 | + | ): |
|
189 | + | loc = last_code.pos_marker.working_loc_after(last_code.raw) |
|
190 | + | reflow_logger.debug( |
|
191 | + | " loc for %s: %s from %s", |
|
192 | + | sibling, |
|
193 | + | loc, |
|
194 | + | last_code, |
|
195 | + | ) |
|
196 | + | if loc[1] > max_desired_line_pos: |
|
197 | + | max_desired_line_pos = loc[1] |
|
198 | + | if seg.is_code: |
|
199 | + | last_code = seg |
|
200 | + | ||
201 | + | desired_space = " " * ( |
|
202 | + | 1 + max_desired_line_pos - whitespace_seg.pos_marker.working_line_pos |
|
203 | + | ) |
|
204 | + | reflow_logger.debug( |
|
205 | + | " desired_space: %r (based on max line pos of %s)", |
|
206 | + | desired_space, |
|
207 | + | max_desired_line_pos, |
|
208 | + | ) |
|
209 | + | return desired_space |
|
210 | + | ||
211 | + | ||
212 | + | def handle_respace__inline_with_space( |
|
213 | + | pre_constraint: str, |
|
214 | + | post_constraint: str, |
|
215 | + | next_block: Optional["ReflowBlock"], |
|
216 | + | root_segment: BaseSegment, |
|
217 | + | segment_buffer: List[RawSegment], |
|
218 | + | last_whitespace: RawSegment, |
|
219 | + | ) -> Tuple[List[RawSegment], List[LintFix]]: |
|
220 | + | """Check inline spacing is the right size. |
|
221 | + | ||
222 | + | This forms one of the cases handled by .respace_point(). |
|
223 | + | ||
224 | + | This code assumes: |
|
225 | + | - a ReflowPoint with no newlines. |
|
226 | + | - a ReflowPoint which has _some_ whitespace. |
|
227 | + | ||
228 | + | Given this we apply constraints to ensure the whitespace |
|
229 | + | is of an appropriate size. |
|
230 | + | """ |
|
231 | + | new_fixes: List[LintFix] = [] |
|
232 | + | # Get some indices so that we can reference around them |
|
233 | + | ws_idx = segment_buffer.index(last_whitespace) |
|
234 | + | ||
235 | + | # Do we have either side set to "any" |
|
236 | + | if "any" in [pre_constraint, post_constraint]: |
|
237 | + | # In this instance - don't change anything. |
|
238 | + | # e.g. this could mean there is a comment on one side. |
|
239 | + | return segment_buffer, new_fixes |
|
240 | + | ||
241 | + | # Do we have either side set to "touch"? |
|
242 | + | if "touch" in [pre_constraint, post_constraint]: |
|
243 | + | # In this instance - no whitespace is correct, This |
|
244 | + | # means we should delete it. |
|
245 | + | new_fixes.append( |
|
246 | + | LintFix( |
|
247 | + | "delete", |
|
248 | + | anchor=last_whitespace, |
|
249 | + | ) |
|
250 | + | ) |
|
251 | + | segment_buffer.pop(ws_idx) |
|
252 | + | return segment_buffer, new_fixes |
|
253 | + | ||
254 | + | # Handle left alignment & singles |
|
255 | + | if ( |
|
256 | + | post_constraint.startswith("align") and next_block |
|
257 | + | ) or pre_constraint == post_constraint == "single": |
|
258 | + | ||
259 | + | # Determine the desired spacing, either as alignment or as a single. |
|
260 | + | if post_constraint.startswith("align") and next_block: |
|
261 | + | alignment_config = post_constraint.split(":") |
|
262 | + | seg_type = alignment_config[1] |
|
263 | + | align_within = alignment_config[2] if len(alignment_config) > 2 else None |
|
264 | + | align_boundary = alignment_config[3] if len(alignment_config) > 3 else None |
|
265 | + | reflow_logger.debug( |
|
266 | + | " Alignment Config: %s, %s, %s, %s", |
|
267 | + | seg_type, |
|
268 | + | align_within, |
|
269 | + | align_boundary, |
|
270 | + | next_block.segments[0].pos_marker.working_line_pos, |
|
271 | + | ) |
|
272 | + | ||
273 | + | desired_space = _determine_aligned_inline_spacing( |
|
274 | + | root_segment, |
|
275 | + | last_whitespace, |
|
276 | + | next_block.segments[0], |
|
277 | + | seg_type, |
|
278 | + | align_within, |
|
279 | + | align_boundary, |
|
280 | + | ) |
|
281 | + | else: |
|
282 | + | desired_space = " " |
|
283 | + | ||
284 | + | if last_whitespace.raw != desired_space: |
|
285 | + | new_seg = last_whitespace.edit(desired_space) |
|
286 | + | new_fixes.append( |
|
287 | + | LintFix( |
|
288 | + | "replace", |
|
289 | + | anchor=last_whitespace, |
|
290 | + | edit=[new_seg], |
|
291 | + | ) |
|
292 | + | ) |
|
293 | + | segment_buffer[ws_idx] = new_seg |
|
294 | + | ||
295 | + | return segment_buffer, new_fixes |
|
296 | + | ||
297 | + | raise NotImplementedError( # pragma: no cover |
|
298 | + | f"Unexpected Constraints: {pre_constraint}, {post_constraint}" |
|
299 | + | ) |
|
300 | + | ||
301 | + | ||
302 | + | def handle_respace__inline_without_space( |
|
303 | + | pre_constraint: str, |
|
304 | + | post_constraint: str, |
|
305 | + | prev_block: Optional["ReflowBlock"], |
|
306 | + | next_block: Optional["ReflowBlock"], |
|
307 | + | segment_buffer: List[RawSegment], |
|
308 | + | existing_fixes: List[LintFix], |
|
309 | + | anchor_on: str = "before", |
|
310 | + | ) -> Tuple[List[RawSegment], List[LintFix], bool]: |
|
311 | + | """Ensure spacing is the right size. |
|
312 | + | ||
313 | + | This forms one of the cases handled by .respace_point(). |
|
314 | + | ||
315 | + | This code assumes: |
|
316 | + | - a ReflowPoint with no newlines. |
|
317 | + | - a ReflowPoint which _no_ whitespace. |
|
318 | + | ||
319 | + | Given this we apply constraints to either confirm no |
|
320 | + | spacing is required or create some of the right size. |
|
321 | + | """ |
|
322 | + | edited = False |
|
323 | + | new_fixes: List[LintFix] = [] |
|
324 | + | # Do we have either side set to "touch" or "any" |
|
325 | + | if {"touch", "any"}.intersection([pre_constraint, post_constraint]): |
|
326 | + | # In this instance - no whitespace is correct. |
|
327 | + | # Either because there shouldn't be, or because "any" |
|
328 | + | # means we shouldn't check. |
|
329 | + | pass |
|
330 | + | # Handle the default case |
|
331 | + | elif pre_constraint == post_constraint == "single": |
|
332 | + | # Insert a single whitespace. |
|
333 | + | reflow_logger.debug(" Inserting Single Whitespace.") |
|
334 | + | # Add it to the buffer first (the easy bit). The hard bit |
|
335 | + | # is to then determine how to generate the appropriate LintFix |
|
336 | + | # objects. |
|
337 | + | segment_buffer.append(WhitespaceSegment()) |
|
338 | + | edited = True |
|
339 | + | ||
340 | + | # So special handling here. If segments either side |
|
341 | + | # already exist then we don't care which we anchor on |
|
342 | + | # but if one is already an insertion (as shown by a lack) |
|
343 | + | # of pos_marker, then we should piggy back on that pre-existing |
|
344 | + | # fix. |
|
345 | + | existing_fix = None |
|
346 | + | insertion = None |
|
347 | + | if prev_block and not prev_block.segments[-1].pos_marker: |
|
348 | + | existing_fix = "after" |
|
349 | + | insertion = prev_block.segments[-1] |
|
350 | + | elif next_block and not next_block.segments[0].pos_marker: |
|
351 | + | existing_fix = "before" |
|
352 | + | insertion = next_block.segments[0] |
|
353 | + | ||
354 | + | if existing_fix: |
|
355 | + | reflow_logger.debug(" Detected existing fix %s", existing_fix) |
|
356 | + | if not existing_fixes: # pragma: no cover |
|
357 | + | raise ValueError( |
|
358 | + | "Fixes detected, but none passed to .respace(). " |
|
359 | + | "This will cause conflicts." |
|
360 | + | ) |
|
361 | + | # Find the fix |
|
362 | + | for fix in existing_fixes: |
|
363 | + | # Does it contain the insertion? |
|
364 | + | # TODO: This feels ugly - eq for BaseSegment is different |
|
365 | + | # to uuid matching for RawSegment. Perhaps this should be |
|
366 | + | # more aligned. There might be a better way of doing this. |
|
367 | + | if ( |
|
368 | + | insertion |
|
369 | + | and fix.edit |
|
370 | + | and insertion.uuid in [elem.uuid for elem in fix.edit] |
|
371 | + | ): |
|
372 | + | break |
|
373 | + | else: # pragma: no cover |
|
374 | + | reflow_logger.warning("Fixes %s", existing_fixes) |
|
375 | + | raise ValueError(f"Couldn't find insertion for {insertion}") |
|
376 | + | # Mutate the existing fix |
|
377 | + | assert fix |
|
378 | + | assert fix.edit # It's going to be an edit if we've picked it up. |
|
379 | + | if existing_fix == "before": |
|
380 | + | fix.edit = [cast(BaseSegment, WhitespaceSegment())] + fix.edit |
|
381 | + | elif existing_fix == "after": |
|
382 | + | fix.edit = fix.edit + [cast(BaseSegment, WhitespaceSegment())] |
|
383 | + | else: |
|
384 | + | reflow_logger.debug(" Not Detected existing fix. Creating new") |
|
385 | + | # Take into account hint on where to anchor if given. |
|
386 | + | if prev_block and anchor_on != "after": |
|
387 | + | new_fixes.append( |
|
388 | + | LintFix( |
|
389 | + | "create_after", |
|
390 | + | anchor=prev_block.segments[-1], |
|
391 | + | edit=[WhitespaceSegment()], |
|
392 | + | ) |
|
393 | + | ) |
|
394 | + | elif next_block: |
|
395 | + | new_fixes.append( |
|
396 | + | LintFix( |
|
397 | + | "create_before", |
|
398 | + | anchor=next_block.segments[0], |
|
399 | + | edit=[WhitespaceSegment()], |
|
400 | + | ) |
|
401 | + | ) |
|
402 | + | else: # pragma: no cover |
|
403 | + | NotImplementedError( |
|
404 | + | "Not set up to handle a missing _after_ and _before_." |
|
405 | + | ) |
|
406 | + | else: # pragma: no cover |
|
407 | + | # TODO: This will get test coverage when configuration routines |
|
408 | + | # are in properly. |
|
409 | + | raise NotImplementedError( |
|
410 | + | f"Unexpected Constraints: {pre_constraint}, {post_constraint}" |
|
411 | + | ) |
|
412 | + | ||
413 | + | return segment_buffer, existing_fixes + new_fixes, edited |
1 | + | """Static methods to support ReflowSequence.rebreak().""" |
|
2 | + | ||
3 | + | import logging |
|
4 | + | from dataclasses import dataclass |
|
5 | + | ||
6 | + | from typing import List, Type, Tuple, cast |
|
7 | + | ||
8 | + | from sqlfluff.core.parser import BaseSegment |
|
9 | + | from sqlfluff.core.rules import LintFix |
|
10 | + | ||
11 | + | from sqlfluff.utils.reflow.elements import ReflowBlock, ReflowPoint, ReflowSequenceType |
|
12 | + | from sqlfluff.utils.reflow.reindent import deduce_line_indent |
|
13 | + | ||
14 | + | ||
15 | + | # We're in the utils module, but users will expect reflow |
|
16 | + | # logs to appear in the context of rules. Hence it's a subset |
|
17 | + | # of the rules logger. |
|
18 | + | reflow_logger = logging.getLogger("sqlfluff.rules.reflow") |
|
19 | + | ||
20 | + | ||
21 | + | @dataclass(frozen=True) |
|
22 | + | class _RebreakSpan: |
|
23 | + | """A location within a sequence to consider rebreaking.""" |
|
24 | + | ||
25 | + | target: BaseSegment |
|
26 | + | start_idx: int |
|
27 | + | end_idx: int |
|
28 | + | line_position: str |
|
29 | + | strict: bool |
|
30 | + | ||
31 | + | ||
32 | + | @dataclass(frozen=True) |
|
33 | + | class _RebreakIndices: |
|
34 | + | """Indices of points for a _RebreakLocation.""" |
|
35 | + | ||
36 | + | dir: int |
|
37 | + | adj_pt_idx: int |
|
38 | + | newline_pt_idx: int |
|
39 | + | pre_code_pt_idx: int |
|
40 | + | ||
41 | + | @classmethod |
|
42 | + | def from_elements( |
|
43 | + | cls: Type["_RebreakIndices"], |
|
44 | + | elements: ReflowSequenceType, |
|
45 | + | start_idx: int, |
|
46 | + | dir: int, |
|
47 | + | ) -> "_RebreakIndices": |
|
48 | + | """Iterate through the elements to deduce important point indices.""" |
|
49 | + | assert dir in (1, -1), "Direction must be a unit direction (i.e. 1 or -1)." |
|
50 | + | # Limit depends on the direction |
|
51 | + | limit = 0 if dir == -1 else len(elements) |
|
52 | + | # The adjacent point is just the next one. |
|
53 | + | adj_point_idx = start_idx + dir |
|
54 | + | # The newline point is next. We hop in 2s because we're checking |
|
55 | + | # only points, which alternate with blocks. |
|
56 | + | for newline_point_idx in range(adj_point_idx, limit, 2 * dir): |
|
57 | + | if "newline" in elements[newline_point_idx].class_types or any( |
|
58 | + | seg.is_code for seg in elements[newline_point_idx + dir].segments |
|
59 | + | ): |
|
60 | + | break |
|
61 | + | # Finally we look for the point preceding the next code element. |
|
62 | + | for pre_code_point_idx in range(newline_point_idx, limit, 2 * dir): |
|
63 | + | if any(seg.is_code for seg in elements[pre_code_point_idx + dir].segments): |
|
64 | + | break |
|
65 | + | return cls(dir, adj_point_idx, newline_point_idx, pre_code_point_idx) |
|
66 | + | ||
67 | + | ||
68 | + | @dataclass(frozen=True) |
|
69 | + | class _RebreakLocation: |
|
70 | + | """A location within a sequence to rebreak, with metadata.""" |
|
71 | + | ||
72 | + | target: BaseSegment |
|
73 | + | prev: _RebreakIndices |
|
74 | + | next: _RebreakIndices |
|
75 | + | line_position: str |
|
76 | + | strict: bool |
|
77 | + | ||
78 | + | @classmethod |
|
79 | + | def from_span( |
|
80 | + | cls: Type["_RebreakLocation"], span: _RebreakSpan, elements: ReflowSequenceType |
|
81 | + | ) -> "_RebreakLocation": |
|
82 | + | """Expand a span to a location.""" |
|
83 | + | return cls( |
|
84 | + | span.target, |
|
85 | + | _RebreakIndices.from_elements(elements, span.start_idx, -1), |
|
86 | + | _RebreakIndices.from_elements(elements, span.end_idx, 1), |
|
87 | + | span.line_position, |
|
88 | + | span.strict, |
|
89 | + | ) |
|
90 | + | ||
91 | + | def has_templated_newline(self, elements: ReflowSequenceType) -> bool: |
|
92 | + | """Is either side a templated newline? |
|
93 | + | ||
94 | + | If either side has a templated newline, then that's ok too. |
|
95 | + | The intent here is that if the next newline is a _templated_ |
|
96 | + | one, then in the source there will be a tag ({{ tag }}), which |
|
97 | + | acts like _not having a newline_. |
|
98 | + | """ |
|
99 | + | # Check the _last_ newline of the previous point. |
|
100 | + | # Slice backward to search in reverse. |
|
101 | + | for seg in elements[self.prev.newline_pt_idx].segments[::-1]: |
|
102 | + | if seg.is_type("newline"): |
|
103 | + | if not seg.pos_marker.is_literal(): |
|
104 | + | return True |
|
105 | + | break |
|
106 | + | # Check the _first_ newline of the next point. |
|
107 | + | for seg in elements[self.next.newline_pt_idx].segments: |
|
108 | + | if seg.is_type("newline"): |
|
109 | + | if not seg.pos_marker.is_literal(): |
|
110 | + | return True |
|
111 | + | break |
|
112 | + | return False |
|
113 | + | ||
114 | + | def has_inappropriate_newlines( |
|
115 | + | self, elements: ReflowSequenceType, strict: bool = False |
|
116 | + | ) -> bool: |
|
117 | + | """Is the span surrounded by one (but not two) line breaks? |
|
118 | + | ||
119 | + | Args: |
|
120 | + | elements: The elements of the ReflowSequence this element |
|
121 | + | is taken from to allow comparison. |
|
122 | + | strict (:obj:`bool`): If set to true, this will not allow |
|
123 | + | the case where there aren't newlines on either side. |
|
124 | + | """ |
|
125 | + | # Here we use the newline index, not |
|
126 | + | # just the adjacent point, so that we can see past comments. |
|
127 | + | n_prev_newlines = elements[self.prev.newline_pt_idx].num_newlines() |
|
128 | + | n_next_newlines = elements[self.next.newline_pt_idx].num_newlines() |
|
129 | + | newlines_on_neither_side = n_prev_newlines + n_next_newlines == 0 |
|
130 | + | newlines_on_both_sides = n_prev_newlines > 0 and n_next_newlines > 0 |
|
131 | + | return ( |
|
132 | + | # If there isn't a newline on either side then carry |
|
133 | + | # on, unless it's strict. |
|
134 | + | (newlines_on_neither_side and not strict) |
|
135 | + | # If there is a newline on BOTH sides. That's ok. |
|
136 | + | or newlines_on_both_sides |
|
137 | + | ) |
|
138 | + | ||
139 | + | ||
140 | + | def identify_rebreak_spans( |
|
141 | + | element_buffer: ReflowSequenceType, root_segment: BaseSegment |
|
142 | + | ) -> List[_RebreakSpan]: |
|
143 | + | """Identify areas in file to rebreak. |
|
144 | + | ||
145 | + | A span here is a block, or group of blocks which have |
|
146 | + | explicit configs for their line position, either directly |
|
147 | + | as raw segments themselves or by virtue of one of their |
|
148 | + | parent segments. |
|
149 | + | """ |
|
150 | + | spans: List[_RebreakSpan] = [] |
|
151 | + | # We'll need at least two elements each side, so constrain |
|
152 | + | # our range accordingly. |
|
153 | + | for idx in range(2, len(element_buffer) - 2): |
|
154 | + | # Only evaluate blocks: |
|
155 | + | elem = element_buffer[idx] |
|
156 | + | # Only evaluate blocks |
|
157 | + | if not isinstance(elem, ReflowBlock): |
|
158 | + | continue |
|
159 | + | # Does the element itself have config? (The easy case) |
|
160 | + | if elem.line_position: |
|
161 | + | # Blocks should only have one segment so it's easy to pick it. |
|
162 | + | spans.append( |
|
163 | + | _RebreakSpan( |
|
164 | + | elem.segments[0], |
|
165 | + | idx, |
|
166 | + | idx, |
|
167 | + | # NOTE: this isn't pretty but until it needs to be more |
|
168 | + | # complex, this works. |
|
169 | + | elem.line_position.split(":")[0], |
|
170 | + | elem.line_position.endswith("strict"), |
|
171 | + | ) |
|
172 | + | ) |
|
173 | + | # Do any of its parents have config, and are we at the start |
|
174 | + | # of them? |
|
175 | + | for key in elem.line_position_configs.keys(): |
|
176 | + | # If we're not at the start of the segment, then pass. |
|
177 | + | if elem.depth_info.stack_positions[key].idx != 0: |
|
178 | + | continue |
|
179 | + | # Can we find the end? |
|
180 | + | for end_idx in range(idx, len(element_buffer) - 2): |
|
181 | + | end_elem = element_buffer[end_idx] |
|
182 | + | if not isinstance(end_elem, ReflowBlock): |
|
183 | + | continue |
|
184 | + | if end_elem.depth_info.stack_positions[key].type in ("end", "solo"): |
|
185 | + | # Found the end. Add it to the stack. |
|
186 | + | # We reference the appropriate element from the parent stack. |
|
187 | + | target_depth = elem.depth_info.stack_hashes.index(key) |
|
188 | + | target = root_segment.path_to(element_buffer[idx].segments[0])[ |
|
189 | + | target_depth |
|
190 | + | ].segment |
|
191 | + | spans.append( |
|
192 | + | _RebreakSpan( |
|
193 | + | target, |
|
194 | + | idx, |
|
195 | + | end_idx, |
|
196 | + | # NOTE: this isn't pretty but until it needs to be more |
|
197 | + | # complex, this works. |
|
198 | + | elem.line_position_configs[key].split(":")[0], |
|
199 | + | elem.line_position_configs[key].endswith("strict"), |
|
200 | + | ) |
|
201 | + | ) |
|
202 | + | break |
|
203 | + | # If we find the start, but not the end, it's not a problem, but |
|
204 | + | # we won't be rebreaking this span. This is important so that we |
|
205 | + | # don't rebreak part of something without the context of what's |
|
206 | + | # in the rest of it. We continue without adding it to the buffer. |
|
207 | + | return spans |
|
208 | + | ||
209 | + | ||
210 | + | def rebreak_sequence( |
|
211 | + | elements: ReflowSequenceType, root_segment: BaseSegment |
|
212 | + | ) -> Tuple[ReflowSequenceType, List[LintFix]]: |
|
213 | + | """Reflow line breaks within a sequence. |
|
214 | + | ||
215 | + | Initially this only _moves_ existing segments |
|
216 | + | around line breaks (e.g. for operators and commas), |
|
217 | + | but eventually this method should also handle line |
|
218 | + | length considerations too. |
|
219 | + | ||
220 | + | This intentionally does *not* handle indentation, |
|
221 | + | as the existing indents are assumed to be correct. |
|
222 | + | """ |
|
223 | + | fixes: List[LintFix] = [] |
|
224 | + | elem_buff: ReflowSequenceType = elements.copy() |
|
225 | + | ||
226 | + | # Given a sequence we should identify the objects which |
|
227 | + | # make sense to rebreak. That includes any raws with config, |
|
228 | + | # but also and parent segments which have config and we can |
|
229 | + | # find both ends for. Given those spans, we then need to find |
|
230 | + | # the points either side of them and then the blocks either |
|
231 | + | # side to respace them at the same time. |
|
232 | + | ||
233 | + | # 1. First find appropriate spans. |
|
234 | + | spans = identify_rebreak_spans(elem_buff, root_segment) |
|
235 | + | ||
236 | + | # The spans give us the edges of operators, but for line positioning we need |
|
237 | + | # to handle comments differently. There are two other important points: |
|
238 | + | # 1. The next newline outward before code (but passing over comments). |
|
239 | + | # 2. The point before the next _code_ segment (ditto comments). |
|
240 | + | locations = [_RebreakLocation.from_span(span, elem_buff) for span in spans] |
|
241 | + | ||
242 | + | # Handle each span: |
|
243 | + | for loc in locations: |
|
244 | + | ||
245 | + | reflow_logger.debug( |
|
246 | + | "Handing Rebreak Span (%r: %s): %r", |
|
247 | + | loc.line_position, |
|
248 | + | loc.target, |
|
249 | + | "".join( |
|
250 | + | elem.raw |
|
251 | + | for elem in elem_buff[ |
|
252 | + | loc.prev.pre_code_pt_idx - 1 : loc.next.pre_code_pt_idx + 2 |
|
253 | + | ] |
|
254 | + | ), |
|
255 | + | ) |
|
256 | + | ||
257 | + | if loc.has_inappropriate_newlines(elem_buff, strict=loc.strict): |
|
258 | + | continue |
|
259 | + | ||
260 | + | if loc.has_templated_newline(elem_buff): |
|
261 | + | continue |
|
262 | + | ||
263 | + | # Points and blocks either side are just offsets from the indices. |
|
264 | + | prev_point = cast(ReflowPoint, elem_buff[loc.prev.adj_pt_idx]) |
|
265 | + | next_point = cast(ReflowPoint, elem_buff[loc.next.adj_pt_idx]) |
|
266 | + | ||
267 | + | # So we know we have a preference, is it ok? |
|
268 | + | if loc.line_position == "leading": |
|
269 | + | if elem_buff[loc.prev.newline_pt_idx].num_newlines(): |
|
270 | + | # We're good. It's already leading. |
|
271 | + | continue |
|
272 | + | # Is it the simple case with no comments between the |
|
273 | + | # old and new desired locations and only a single following |
|
274 | + | # whitespace? |
|
275 | + | elif ( |
|
276 | + | loc.next.adj_pt_idx == loc.next.pre_code_pt_idx |
|
277 | + | and elem_buff[loc.next.newline_pt_idx].num_newlines() == 1 |
|
278 | + | ): |
|
279 | + | reflow_logger.debug(" Trailing Easy Case") |
|
280 | + | # Simple case. No comments. |
|
281 | + | # Strip newlines from the next point. Apply the indent to |
|
282 | + | # the previous point. |
|
283 | + | fixes, prev_point = prev_point.indent_to( |
|
284 | + | next_point.get_indent() or "", before=loc.target |
|
285 | + | ) |
|
286 | + | fixes, next_point = next_point.respace_point( |
|
287 | + | cast(ReflowBlock, elem_buff[loc.next.adj_pt_idx - 1]), |
|
288 | + | cast(ReflowBlock, elem_buff[loc.next.adj_pt_idx + 1]), |
|
289 | + | root_segment=root_segment, |
|
290 | + | fixes=fixes, |
|
291 | + | strip_newlines=True, |
|
292 | + | ) |
|
293 | + | # Update the points in the buffer |
|
294 | + | elem_buff[loc.prev.adj_pt_idx] = prev_point |
|
295 | + | elem_buff[loc.next.adj_pt_idx] = next_point |
|
296 | + | else: |
|
297 | + | reflow_logger.debug(" Trailing Tricky Case") |
|
298 | + | # Otherwise we've got a tricky scenario where there are comments |
|
299 | + | # to negotiate around. In this case, we _move the target_ |
|
300 | + | # rather than just adjusting the whitespace. |
|
301 | + | ||
302 | + | # Delete the existing position of the target, and |
|
303 | + | # the _preceding_ point. |
|
304 | + | fixes.append(LintFix.delete(loc.target)) |
|
305 | + | for seg in elem_buff[loc.prev.adj_pt_idx].segments: |
|
306 | + | fixes.append(LintFix.delete(seg)) |
|
307 | + | ||
308 | + | # We always reinsert after the first point, but respace |
|
309 | + | # the inserted point to ensure it's the right size given |
|
310 | + | # configs. |
|
311 | + | fixes, new_point = ReflowPoint(()).respace_point( |
|
312 | + | cast(ReflowBlock, elem_buff[loc.next.adj_pt_idx - 1]), |
|
313 | + | cast(ReflowBlock, elem_buff[loc.next.pre_code_pt_idx + 1]), |
|
314 | + | root_segment=root_segment, |
|
315 | + | fixes=fixes, |
|
316 | + | anchor_on="after", |
|
317 | + | ) |
|
318 | + | fixes.append( |
|
319 | + | LintFix.create_after( |
|
320 | + | elem_buff[loc.next.pre_code_pt_idx].segments[-1], |
|
321 | + | [loc.target], |
|
322 | + | ) |
|
323 | + | ) |
|
324 | + | ||
325 | + | elem_buff = ( |
|
326 | + | elem_buff[: loc.prev.adj_pt_idx] |
|
327 | + | + elem_buff[loc.next.adj_pt_idx : loc.next.pre_code_pt_idx + 1] |
|
328 | + | + elem_buff[ |
|
329 | + | loc.prev.adj_pt_idx + 1 : loc.next.adj_pt_idx |
|
330 | + | ] # the target |
|
331 | + | + [new_point] |
|
332 | + | + elem_buff[loc.next.pre_code_pt_idx + 1 :] |
|
333 | + | ) |
|
334 | + | ||
335 | + | elif loc.line_position == "trailing": |
|
336 | + | if elem_buff[loc.next.newline_pt_idx].num_newlines(): |
|
337 | + | # We're good, it's already trailing. |
|
338 | + | continue |
|
339 | + | # Is it the simple case with no comments between the |
|
340 | + | # old and new desired locations and only one previous newline? |
|
341 | + | elif ( |
|
342 | + | loc.prev.adj_pt_idx == loc.prev.pre_code_pt_idx |
|
343 | + | and elem_buff[loc.prev.newline_pt_idx].num_newlines() == 1 |
|
344 | + | ): |
|
345 | + | reflow_logger.debug(" Leading Easy Case") |
|
346 | + | # Simple case. No comments. |
|
347 | + | # Strip newlines from the previous point. Apply the indent |
|
348 | + | # to the next point. |
|
349 | + | fixes, next_point = next_point.indent_to( |
|
350 | + | prev_point.get_indent() or "", after=loc.target |
|
351 | + | ) |
|
352 | + | fixes, prev_point = prev_point.respace_point( |
|
353 | + | cast(ReflowBlock, elem_buff[loc.prev.adj_pt_idx - 1]), |
|
354 | + | cast(ReflowBlock, elem_buff[loc.prev.adj_pt_idx + 1]), |
|
355 | + | root_segment=root_segment, |
|
356 | + | fixes=fixes, |
|
357 | + | strip_newlines=True, |
|
358 | + | ) |
|
359 | + | # Update the points in the buffer |
|
360 | + | elem_buff[loc.prev.adj_pt_idx] = prev_point |
|
361 | + | elem_buff[loc.next.adj_pt_idx] = next_point |
|
362 | + | else: |
|
363 | + | reflow_logger.debug(" Leading Tricky Case") |
|
364 | + | # Otherwise we've got a tricky scenario where there are comments |
|
365 | + | # to negotiate around. In this case, we _move the target_ |
|
366 | + | # rather than just adjusting the whitespace. |
|
367 | + | ||
368 | + | # Delete the existing position of the target, and |
|
369 | + | # the _following_ point. |
|
370 | + | fixes.append(LintFix.delete(loc.target)) |
|
371 | + | for seg in elem_buff[loc.next.adj_pt_idx].segments: |
|
372 | + | fixes.append(LintFix.delete(seg)) |
|
373 | + | ||
374 | + | # We always reinsert before the first point, but respace |
|
375 | + | # the inserted point to ensure it's the right size given |
|
376 | + | # configs. |
|
377 | + | fixes, new_point = ReflowPoint(()).respace_point( |
|
378 | + | cast(ReflowBlock, elem_buff[loc.prev.pre_code_pt_idx - 1]), |
|
379 | + | cast(ReflowBlock, elem_buff[loc.prev.adj_pt_idx + 1]), |
|
380 | + | root_segment=root_segment, |
|
381 | + | fixes=fixes, |
|
382 | + | anchor_on="before", |
|
383 | + | ) |
|
384 | + | fixes.append( |
|
385 | + | LintFix.create_before( |
|
386 | + | elem_buff[loc.prev.pre_code_pt_idx].segments[0], |
|
387 | + | [loc.target], |
|
388 | + | ) |
|
389 | + | ) |
|
390 | + | ||
391 | + | elem_buff = ( |
|
392 | + | elem_buff[: loc.prev.pre_code_pt_idx] |
|
393 | + | + [new_point] |
|
394 | + | + elem_buff[ |
|
395 | + | loc.prev.adj_pt_idx + 1 : loc.next.adj_pt_idx |
|
396 | + | ] # the target |
|
397 | + | + elem_buff[loc.prev.pre_code_pt_idx : loc.prev.adj_pt_idx + 1] |
|
398 | + | + elem_buff[loc.next.adj_pt_idx + 1 :] |
|
399 | + | ) |
|
400 | + | ||
401 | + | elif loc.line_position == "alone": |
|
402 | + | # If we get here we can assume that the element is currently |
|
403 | + | # either leading or trailing and needs to be moved onto its |
|
404 | + | # own line. |
|
405 | + | ||
406 | + | # First handle the following newlines first (easy). |
|
407 | + | if not elem_buff[loc.next.newline_pt_idx].num_newlines(): |
|
408 | + | reflow_logger.debug(" Found missing newline after in alone case") |
|
409 | + | pre_fixes, next_point = next_point.indent_to( |
|
410 | + | deduce_line_indent(loc.target.raw_segments[-1], root_segment), |
|
411 | + | after=loc.target, |
|
412 | + | ) |
|
413 | + | fixes += pre_fixes |
|
414 | + | # Update the point in the buffer |
|
415 | + | elem_buff[loc.next.adj_pt_idx] = next_point |
|
416 | + | ||
417 | + | # Then handle newlines before. (hoisting past comments if needed). |
|
418 | + | if not elem_buff[loc.prev.adj_pt_idx].num_newlines(): |
|
419 | + | reflow_logger.debug(" Found missing newline before in alone case") |
|
420 | + | # NOTE: In the case that there are comments _after_ the |
|
421 | + | # target, they will be moved with it. This might break things |
|
422 | + | # but there isn't an unambiguous way to do this, because we |
|
423 | + | # can't be sure what the comments are referring to. |
|
424 | + | # Given that, we take the simple option. |
|
425 | + | post_fixes, prev_point = prev_point.indent_to( |
|
426 | + | deduce_line_indent(loc.target.raw_segments[0], root_segment), |
|
427 | + | before=loc.target, |
|
428 | + | ) |
|
429 | + | fixes += post_fixes |
|
430 | + | # Update the point in the buffer |
|
431 | + | elem_buff[loc.prev.adj_pt_idx] = prev_point |
|
432 | + | ||
433 | + | else: |
|
434 | + | raise NotImplementedError( # pragma: no cover |
|
435 | + | f"Unexpected line_position config: {loc.line_position}" |
|
436 | + | ) |
|
437 | + | ||
438 | + | return elem_buff, fixes |
1 | 1 | """Dataclasses for reflow work.""" |
|
2 | 2 | ||
3 | - | ||
4 | - | from dataclasses import dataclass |
|
5 | 3 | from itertools import chain |
|
6 | 4 | import logging |
|
7 | - | from typing import Iterator, List, Optional, Sequence, Tuple, cast, Type, Union |
|
5 | + | from typing import Iterator, List, Optional, Sequence, Tuple, cast, Type |
|
8 | 6 | from sqlfluff.core.config import FluffConfig |
|
9 | 7 | ||
10 | 8 | from sqlfluff.core.parser import BaseSegment, RawSegment |
|
11 | 9 | from sqlfluff.core.rules.base import LintFix |
|
12 | 10 | from sqlfluff.utils.reflow.config import ReflowConfig |
|
13 | 11 | from sqlfluff.utils.reflow.depthmap import DepthMap |
|
14 | 12 | ||
15 | - | from sqlfluff.utils.reflow.elements import ReflowBlock, ReflowPoint |
|
13 | + | from sqlfluff.utils.reflow.elements import ReflowBlock, ReflowPoint, ReflowSequenceType |
|
14 | + | from sqlfluff.utils.reflow.rebreak import rebreak_sequence |
|
16 | 15 | ||
17 | 16 | # We're in the utils module, but users will expect reflow |
|
18 | 17 | # logs to appear in the context of rules. Hence it's a subset |
|
19 | 18 | # of the rules logger. |
|
20 | 19 | reflow_logger = logging.getLogger("sqlfluff.rules.reflow") |
|
21 | - | ReflowSequenceType = List[Union[ReflowBlock, ReflowPoint]] |
|
22 | - | ||
23 | - | ||
24 | - | @dataclass(frozen=True) |
|
25 | - | class _RebreakSpan: |
|
26 | - | """A location within a sequence to consider rebreaking.""" |
|
27 | - | ||
28 | - | target: BaseSegment |
|
29 | - | start_idx: int |
|
30 | - | end_idx: int |
|
31 | - | line_position: str |
|
32 | - | strict: bool |
|
33 | - | ||
34 | - | ||
35 | - | @dataclass(frozen=True) |
|
36 | - | class _RebreakIndices: |
|
37 | - | """Indices of points for a _RebreakLocation.""" |
|
38 | - | ||
39 | - | dir: int |
|
40 | - | adj_pt_idx: int |
|
41 | - | newline_pt_idx: int |
|
42 | - | pre_code_pt_idx: int |
|
43 | - | ||
44 | - | @classmethod |
|
45 | - | def from_elements( |
|
46 | - | cls: Type["_RebreakIndices"], |
|
47 | - | elements: ReflowSequenceType, |
|
48 | - | start_idx: int, |
|
49 | - | dir: int, |
|
50 | - | ) -> "_RebreakIndices": |
|
51 | - | """Iterate through the elements to deduce important point indices.""" |
|
52 | - | assert dir in (1, -1), "Direction must be a unit direction (i.e. 1 or -1)." |
|
53 | - | # Limit depends on the direction |
|
54 | - | limit = 0 if dir == -1 else len(elements) |
|
55 | - | # The adjacent point is just the next one. |
|
56 | - | adj_point_idx = start_idx + dir |
|
57 | - | # The newline point is next. We hop in 2s because we're checking |
|
58 | - | # only points, which alternate with blocks. |
|
59 | - | for newline_point_idx in range(adj_point_idx, limit, 2 * dir): |
|
60 | - | if "newline" in elements[newline_point_idx].class_types or any( |
|
61 | - | seg.is_code for seg in elements[newline_point_idx + dir].segments |
|
62 | - | ): |
|
63 | - | break |
|
64 | - | # Finally we look for the point preceding the next code element. |
|
65 | - | for pre_code_point_idx in range(newline_point_idx, limit, 2 * dir): |
|
66 | - | if any(seg.is_code for seg in elements[pre_code_point_idx + dir].segments): |
|
67 | - | break |
|
68 | - | return cls(dir, adj_point_idx, newline_point_idx, pre_code_point_idx) |
|
69 | - | ||
70 | - | ||
71 | - | @dataclass(frozen=True) |
|
72 | - | class _RebreakLocation: |
|
73 | - | """A location within a sequence to rebreak, with metadata.""" |
|
74 | - | ||
75 | - | target: BaseSegment |
|
76 | - | prev: _RebreakIndices |
|
77 | - | next: _RebreakIndices |
|
78 | - | line_position: str |
|
79 | - | strict: bool |
|
80 | - | ||
81 | - | @classmethod |
|
82 | - | def from_span( |
|
83 | - | cls: Type["_RebreakLocation"], span: _RebreakSpan, elements: ReflowSequenceType |
|
84 | - | ) -> "_RebreakLocation": |
|
85 | - | """Expand a span to a location.""" |
|
86 | - | return cls( |
|
87 | - | span.target, |
|
88 | - | _RebreakIndices.from_elements(elements, span.start_idx, -1), |
|
89 | - | _RebreakIndices.from_elements(elements, span.end_idx, 1), |
|
90 | - | span.line_position, |
|
91 | - | span.strict, |
|
92 | - | ) |
|
93 | - | ||
94 | - | def has_templated_newline(self, elements: ReflowSequenceType) -> bool: |
|
95 | - | """Is either side a templated newline? |
|
96 | - | ||
97 | - | If either side has a templated newline, then that's ok too. |
|
98 | - | The intent here is that if the next newline is a _templated_ |
|
99 | - | one, then in the source there will be a tag ({{ tag }}), which |
|
100 | - | acts like _not having a newline_. |
|
101 | - | """ |
|
102 | - | # Check the _last_ newline of the previous point. |
|
103 | - | # Slice backward to search in reverse. |
|
104 | - | for seg in elements[self.prev.newline_pt_idx].segments[::-1]: |
|
105 | - | if seg.is_type("newline"): |
|
106 | - | if not seg.pos_marker.is_literal(): |
|
107 | - | return True |
|
108 | - | break |
|
109 | - | # Check the _first_ newline of the next point. |
|
110 | - | for seg in elements[self.next.newline_pt_idx].segments: |
|
111 | - | if seg.is_type("newline"): |
|
112 | - | if not seg.pos_marker.is_literal(): |
|
113 | - | return True |
|
114 | - | break |
|
115 | - | return False |
|
116 | - | ||
117 | - | def has_inappropriate_newlines( |
|
118 | - | self, elements: ReflowSequenceType, strict: bool = False |
|
119 | - | ) -> bool: |
|
120 | - | """Is the span surrounded by one (but not two) line breaks? |
|
121 | - | ||
122 | - | Args: |
|
123 | - | elements: The elements of the ReflowSequence this element |
|
124 | - | is taken from to allow comparison. |
|
125 | - | strict (:obj:`bool`): If set to true, this will not allow |
|
126 | - | the case where there aren't newlines on either side. |
|
127 | - | """ |
|
128 | - | # Here we use the newline index, not |
|
129 | - | # just the adjacent point, so that we can see past comments. |
|
130 | - | n_prev_newlines = elements[self.prev.newline_pt_idx].num_newlines() |
|
131 | - | n_next_newlines = elements[self.next.newline_pt_idx].num_newlines() |
|
132 | - | newlines_on_neither_side = n_prev_newlines + n_next_newlines == 0 |
|
133 | - | newlines_on_both_sides = n_prev_newlines > 0 and n_next_newlines > 0 |
|
134 | - | return ( |
|
135 | - | # If there isn't a newline on either side then carry |
|
136 | - | # on, unless it's strict. |
|
137 | - | (newlines_on_neither_side and not strict) |
|
138 | - | # If there is a newline on BOTH sides. That's ok. |
|
139 | - | or newlines_on_both_sides |
|
140 | - | ) |
|
141 | 20 | ||
142 | 21 | ||
143 | 22 | class ReflowSequence: |
631 | 510 | embodied_fixes=fixes, |
|
632 | 511 | ) |
|
633 | 512 | ||
634 | - | @staticmethod |
|
635 | - | def _identify_rebreak_spans( |
|
636 | - | element_buffer: ReflowSequenceType, root_segment: BaseSegment |
|
637 | - | ) -> List[_RebreakSpan]: |
|
638 | - | spans: List[_RebreakSpan] = [] |
|
639 | - | # We'll need at least two elements each side, so constrain |
|
640 | - | # our range accordingly. |
|
641 | - | for idx in range(2, len(element_buffer) - 2): |
|
642 | - | # Only evaluate blocks: |
|
643 | - | elem = element_buffer[idx] |
|
644 | - | # Only evaluate blocks |
|
645 | - | if not isinstance(elem, ReflowBlock): |
|
646 | - | continue |
|
647 | - | # Does the element itself have config? (The easy case) |
|
648 | - | if elem.line_position: |
|
649 | - | # Blocks should only have one segment so it's easy to pick it. |
|
650 | - | spans.append( |
|
651 | - | _RebreakSpan( |
|
652 | - | elem.segments[0], |
|
653 | - | idx, |
|
654 | - | idx, |
|
655 | - | # NOTE: this isn't pretty but until it needs to be more |
|
656 | - | # complex, this works. |
|
657 | - | elem.line_position.split(":")[0], |
|
658 | - | elem.line_position.endswith("strict"), |
|
659 | - | ) |
|
660 | - | ) |
|
661 | - | # Do any of its parents have config, and are we at the start |
|
662 | - | # of them? |
|
663 | - | for key in elem.line_position_configs.keys(): |
|
664 | - | # If we're not at the start of the segment, then pass. |
|
665 | - | if elem.depth_info.stack_positions[key].idx != 0: |
|
666 | - | continue |
|
667 | - | # Can we find the end? |
|
668 | - | for end_idx in range(idx, len(element_buffer) - 2): |
|
669 | - | end_elem = element_buffer[end_idx] |
|
670 | - | if not isinstance(end_elem, ReflowBlock): |
|
671 | - | continue |
|
672 | - | if end_elem.depth_info.stack_positions[key].type in ("end", "solo"): |
|
673 | - | # Found the end. Add it to the stack. |
|
674 | - | # We reference the appropriate element from the parent stack. |
|
675 | - | target_depth = elem.depth_info.stack_hashes.index(key) |
|
676 | - | target = root_segment.path_to(element_buffer[idx].segments[0])[ |
|
677 | - | target_depth |
|
678 | - | ].segment |
|
679 | - | spans.append( |
|
680 | - | _RebreakSpan( |
|
681 | - | target, |
|
682 | - | idx, |
|
683 | - | end_idx, |
|
684 | - | # NOTE: this isn't pretty but until it needs to be more |
|
685 | - | # complex, this works. |
|
686 | - | elem.line_position_configs[key].split(":")[0], |
|
687 | - | elem.line_position_configs[key].endswith("strict"), |
|
688 | - | ) |
|
689 | - | ) |
|
690 | - | break |
|
691 | - | # If we find the start, but not the end, it's not a problem, but |
|
692 | - | # we won't be rebreaking this span. This is important so that we |
|
693 | - | # don't rebreak part of something without the context of what's |
|
694 | - | # in the rest of it. We continue without adding it to the buffer. |
|
695 | - | return spans |
|
696 | - | ||
697 | - | def _deduce_line_indent(self, raw_segment: RawSegment) -> str: |
|
698 | - | """Given a raw segment, deduce the indent of it's line.""" |
|
699 | - | seg_idx = self.root_segment.raw_segments.index(raw_segment) |
|
700 | - | indent_seg = None |
|
701 | - | for seg in self.root_segment.raw_segments[seg_idx::-1]: |
|
702 | - | if seg.is_code: |
|
703 | - | indent_seg = None |
|
704 | - | elif seg.is_type("whitespace"): |
|
705 | - | indent_seg = seg |
|
706 | - | elif seg.is_type("newline"): |
|
707 | - | break |
|
708 | - | reflow_logger.debug("Deduced indent for %s as %s", raw_segment, indent_seg) |
|
709 | - | if indent_seg: |
|
710 | - | return indent_seg.raw |
|
711 | - | else: |
|
712 | - | return "" |
|
713 | - | ||
714 | 513 | def rebreak(self): |
|
715 | 514 | """Reflow line breaks within a sequence. |
|
716 | 515 |
727 | 526 | "rebreak cannot currently handle pre-existing embodied fixes." |
|
728 | 527 | ) |
|
729 | 528 | ||
730 | - | fixes = [] |
|
731 | - | elem_buff: ReflowSequenceType = self.elements.copy() |
|
732 | - | ||
733 | - | # Given a sequence we should identify the objects which |
|
734 | - | # make sense to rebreak. That includes any raws with config, |
|
735 | - | # but also and parent segments which have config and we can |
|
736 | - | # find both ends for. Given those spans, we then need to find |
|
737 | - | # the points either side of them and then the blocks either |
|
738 | - | # side to respace them at the same time. |
|
739 | - | ||
740 | - | # 1. First find appropriate spans. |
|
741 | - | spans = self._identify_rebreak_spans(self.elements, self.root_segment) |
|
742 | - | ||
743 | - | # The spans give us the edges of operators, but for line positioning we need |
|
744 | - | # to handle comments differently. There are two other important points: |
|
745 | - | # 1. The next newline outward before code (but passing over comments). |
|
746 | - | # 2. The point before the next _code_ segment (ditto comments). |
|
747 | - | locations = [_RebreakLocation.from_span(span, self.elements) for span in spans] |
|
748 | - | ||
749 | - | # Handle each span: |
|
750 | - | for loc in locations: |
|
751 | - | ||
752 | - | reflow_logger.debug( |
|
753 | - | "Handing Rebreak Span (%r: %s): %r", |
|
754 | - | loc.line_position, |
|
755 | - | loc.target, |
|
756 | - | "".join( |
|
757 | - | elem.raw |
|
758 | - | for elem in elem_buff[ |
|
759 | - | loc.prev.pre_code_pt_idx - 1 : loc.next.pre_code_pt_idx + 2 |
|
760 | - | ] |
|
761 | - | ), |
|
762 | - | ) |
|
763 | - | ||
764 | - | if loc.has_inappropriate_newlines(elem_buff, strict=loc.strict): |
|
765 | - | continue |
|
766 | - | ||
767 | - | if loc.has_templated_newline(elem_buff): |
|
768 | - | continue |
|
769 | - | ||
770 | - | # Points and blocks either side are just offsets from the indices. |
|
771 | - | prev_point = elem_buff[loc.prev.adj_pt_idx] |
|
772 | - | next_point = elem_buff[loc.next.adj_pt_idx] |
|
773 | - | ||
774 | - | # So we know we have a preference, is it ok? |
|
775 | - | if loc.line_position == "leading": |
|
776 | - | if elem_buff[loc.prev.newline_pt_idx].num_newlines(): |
|
777 | - | # We're good. It's already leading. |
|
778 | - | continue |
|
779 | - | # Is it the simple case with no comments between the |
|
780 | - | # old and new desired locations and only a single following |
|
781 | - | # whitespace? |
|
782 | - | elif ( |
|
783 | - | loc.next.adj_pt_idx == loc.next.pre_code_pt_idx |
|
784 | - | and elem_buff[loc.next.newline_pt_idx].num_newlines() == 1 |
|
785 | - | ): |
|
786 | - | reflow_logger.debug(" Trailing Easy Case") |
|
787 | - | # Simple case. No comments. |
|
788 | - | # Strip newlines from the next point. Apply the indent to |
|
789 | - | # the previous point. |
|
790 | - | fixes, prev_point = prev_point.indent_to( |
|
791 | - | next_point.get_indent() or "", before=loc.target |
|
792 | - | ) |
|
793 | - | fixes, next_point = next_point.respace_point( |
|
794 | - | elem_buff[loc.next.adj_pt_idx - 1], |
|
795 | - | elem_buff[loc.next.adj_pt_idx + 1], |
|
796 | - | root_segment=self.root_segment, |
|
797 | - | fixes=fixes, |
|
798 | - | strip_newlines=True, |
|
799 | - | ) |
|
800 | - | # Update the points in the buffer |
|
801 | - | elem_buff[loc.prev.adj_pt_idx] = prev_point |
|
802 | - | elem_buff[loc.next.adj_pt_idx] = next_point |
|
803 | - | else: |
|
804 | - | reflow_logger.debug(" Trailing Tricky Case") |
|
805 | - | # Otherwise we've got a tricky scenario where there are comments |
|
806 | - | # to negotiate around. In this case, we _move the target_ |
|
807 | - | # rather than just adjusting the whitespace. |
|
808 | - | ||
809 | - | # Delete the existing position of the target, and |
|
810 | - | # the _preceding_ point. |
|
811 | - | fixes.append(LintFix.delete(loc.target)) |
|
812 | - | for seg in elem_buff[loc.prev.adj_pt_idx].segments: |
|
813 | - | fixes.append(LintFix.delete(seg)) |
|
814 | - | ||
815 | - | # We always reinsert after the first point, but respace |
|
816 | - | # the inserted point to ensure it's the right size given |
|
817 | - | # configs. |
|
818 | - | fixes, new_point = ReflowPoint([]).respace_point( |
|
819 | - | elem_buff[loc.next.adj_pt_idx - 1], |
|
820 | - | elem_buff[loc.next.pre_code_pt_idx + 1], |
|
821 | - | root_segment=self.root_segment, |
|
822 | - | fixes=fixes, |
|
823 | - | anchor_on="after", |
|
824 | - | ) |
|
825 | - | fixes.append( |
|
826 | - | LintFix.create_after( |
|
827 | - | elem_buff[loc.next.pre_code_pt_idx].segments[-1], |
|
828 | - | [loc.target], |
|
829 | - | ) |
|
830 | - | ) |
|
831 | - | ||
832 | - | elem_buff = ( |
|
833 | - | elem_buff[: loc.prev.adj_pt_idx] |
|
834 | - | + elem_buff[loc.next.adj_pt_idx : loc.next.pre_code_pt_idx + 1] |
|
835 | - | + elem_buff[ |
|
836 | - | loc.prev.adj_pt_idx + 1 : loc.next.adj_pt_idx |
|
837 | - | ] # the target |
|
838 | - | + [new_point] |
|
839 | - | + elem_buff[loc.next.pre_code_pt_idx + 1 :] |
|
840 | - | ) |
|
841 | - | ||
842 | - | elif loc.line_position == "trailing": |
|
843 | - | if elem_buff[loc.next.newline_pt_idx].num_newlines(): |
|
844 | - | # We're good, it's already trailing. |
|
845 | - | continue |
|
846 | - | # Is it the simple case with no comments between the |
|
847 | - | # old and new desired locations and only one previous newline? |
|
848 | - | elif ( |
|
849 | - | loc.prev.adj_pt_idx == loc.prev.pre_code_pt_idx |
|
850 | - | and elem_buff[loc.prev.newline_pt_idx].num_newlines() == 1 |
|
851 | - | ): |
|
852 | - | reflow_logger.debug(" Leading Easy Case") |
|
853 | - | # Simple case. No comments. |
|
854 | - | # Strip newlines from the previous point. Apply the indent |
|
855 | - | # to the next point. |
|
856 | - | fixes, next_point = next_point.indent_to( |
|
857 | - | prev_point.get_indent() or "", after=loc.target |
|
858 | - | ) |
|
859 | - | fixes, prev_point = prev_point.respace_point( |
|
860 | - | elem_buff[loc.prev.adj_pt_idx - 1], |
|
861 | - | elem_buff[loc.prev.adj_pt_idx + 1], |
|
862 | - | root_segment=self.root_segment, |
|
863 | - | fixes=fixes, |
|
864 | - | strip_newlines=True, |
|
865 | - | ) |
|
866 | - | # Update the points in the buffer |
|
867 | - | elem_buff[loc.prev.adj_pt_idx] = prev_point |
|
868 | - | elem_buff[loc.next.adj_pt_idx] = next_point |
|
869 | - | else: |
|
870 | - | reflow_logger.debug(" Leading Tricky Case") |
|
871 | - | # Otherwise we've got a tricky scenario where there are comments |
|
872 | - | # to negotiate around. In this case, we _move the target_ |
|
873 | - | # rather than just adjusting the whitespace. |
|
874 | - | ||
875 | - | # Delete the existing position of the target, and |
|
876 | - | # the _following_ point. |
|
877 | - | fixes.append(LintFix.delete(loc.target)) |
|
878 | - | for seg in elem_buff[loc.next.adj_pt_idx].segments: |
|
879 | - | fixes.append(LintFix.delete(seg)) |
|
880 | - | ||
881 | - | # We always reinsert before the first point, but respace |
|
882 | - | # the inserted point to ensure it's the right size given |
|
883 | - | # configs. |
|
884 | - | fixes, new_point = ReflowPoint([]).respace_point( |
|
885 | - | elem_buff[loc.prev.pre_code_pt_idx - 1], |
|
886 | - | elem_buff[loc.prev.adj_pt_idx + 1], |
|
887 | - | root_segment=self.root_segment, |
|
888 | - | fixes=fixes, |
|
889 | - | anchor_on="before", |
|
890 | - | ) |
|
891 | - | fixes.append( |
|
892 | - | LintFix.create_before( |
|
893 | - | elem_buff[loc.prev.pre_code_pt_idx].segments[0], |
|
894 | - | [loc.target], |
|
895 | - | ) |
|
896 | - | ) |
|
897 | - | ||
898 | - | elem_buff = ( |
|
899 | - | elem_buff[: loc.prev.pre_code_pt_idx] |
|
900 | - | + [new_point] |
|
901 | - | + elem_buff[ |
|
902 | - | loc.prev.adj_pt_idx + 1 : loc.next.adj_pt_idx |
|
903 | - | ] # the target |
|
904 | - | + elem_buff[loc.prev.pre_code_pt_idx : loc.prev.adj_pt_idx + 1] |
|
905 | - | + elem_buff[loc.next.adj_pt_idx + 1 :] |
|
906 | - | ) |
|
907 | - | ||
908 | - | elif loc.line_position == "alone": |
|
909 | - | # If we get here we can assume that the element is currently |
|
910 | - | # either leading or trailing and needs to be moved onto its |
|
911 | - | # own line. |
|
912 | - | ||
913 | - | # First handle the following newlines first (easy). |
|
914 | - | if not elem_buff[loc.next.newline_pt_idx].num_newlines(): |
|
915 | - | reflow_logger.debug(" Found missing newline after in alone case") |
|
916 | - | pre_fixes, next_point = next_point.indent_to( |
|
917 | - | self._deduce_line_indent(loc.target.raw_segments[-1]), |
|
918 | - | after=loc.target, |
|
919 | - | ) |
|
920 | - | fixes += pre_fixes |
|
921 | - | # Update the point in the buffer |
|
922 | - | elem_buff[loc.next.adj_pt_idx] = next_point |
|
923 | - | ||
924 | - | # Then handle newlines before. (hoisting past comments if needed). |
|
925 | - | if not elem_buff[loc.prev.adj_pt_idx].num_newlines(): |
|
926 | - | reflow_logger.debug(" Found missing newline before in alone case") |
|
927 | - | # NOTE: In the case that there are comments _after_ the |
|
928 | - | # target, they will be moved with it. This might break things |
|
929 | - | # but there isn't an unambiguous way to do this, because we |
|
930 | - | # can't be sure what the comments are referring to. |
|
931 | - | # Given that, we take the simple option. |
|
932 | - | post_fixes, prev_point = prev_point.indent_to( |
|
933 | - | self._deduce_line_indent(loc.target.raw_segments[0]), |
|
934 | - | before=loc.target, |
|
935 | - | ) |
|
936 | - | fixes += post_fixes |
|
937 | - | # Update the point in the buffer |
|
938 | - | elem_buff[loc.prev.adj_pt_idx] = prev_point |
|
939 | - | ||
940 | - | else: |
|
941 | - | raise NotImplementedError( # pragma: no cover |
|
942 | - | f"Unexpected line_position config: {loc.line_position}" |
|
943 | - | ) |
|
529 | + | # Delegate to the rebreak algorithm |
|
530 | + | elem_buff, fixes = rebreak_sequence(self.elements, self.root_segment) |
|
944 | 531 | ||
945 | 532 | return ReflowSequence( |
|
946 | 533 | elements=elem_buff, |
3 | 3 | from itertools import chain |
|
4 | 4 | import logging |
|
5 | 5 | from dataclasses import dataclass |
|
6 | - | from typing import Dict, List, Optional, Sequence, Set, Tuple, Type, cast |
|
6 | + | from typing import Dict, List, Optional, Sequence, Set, Tuple, Type, Union |
|
7 | 7 | ||
8 | 8 | from sqlfluff.core.parser import BaseSegment, RawSegment |
|
9 | 9 | from sqlfluff.core.parser.segments.raw import NewlineSegment, WhitespaceSegment |
12 | 12 | from sqlfluff.utils.reflow.config import ReflowConfig |
|
13 | 13 | from sqlfluff.utils.reflow.depthmap import DepthInfo |
|
14 | 14 | ||
15 | + | # Respace Algorithms |
|
16 | + | from sqlfluff.utils.reflow.respace import ( |
|
17 | + | determine_constraints, |
|
18 | + | process_spacing, |
|
19 | + | handle_respace__inline_with_space, |
|
20 | + | handle_respace__inline_without_space, |
|
21 | + | ) |
|
22 | + | ||
15 | 23 | # We're in the utils module, but users will expect reflow |
|
16 | 24 | # logs to appear in the context of rules. Hence it's a subset |
|
17 | 25 | # of the rules logger. |
116 | 124 | side. |
|
117 | 125 | """ |
|
118 | 126 | ||
119 | - | @staticmethod |
|
120 | - | def _determine_constraints( |
|
121 | - | prev_block: Optional[ReflowBlock], |
|
122 | - | next_block: Optional[ReflowBlock], |
|
123 | - | strip_newlines: bool = False, |
|
124 | - | ) -> Tuple[str, str, bool]: |
|
125 | - | """Given the surrounding blocks, determine appropriate constraints.""" |
|
126 | - | # Start with the defaults. |
|
127 | - | pre_constraint = prev_block.spacing_after if prev_block else "single" |
|
128 | - | post_constraint = next_block.spacing_before if next_block else "single" |
|
129 | - | ||
130 | - | # Work out the common parent segment and depth |
|
131 | - | if prev_block and next_block: |
|
132 | - | common = prev_block.depth_info.common_with(next_block.depth_info) |
|
133 | - | # Just check the most immediate parent for now for speed. |
|
134 | - | # TODO: Review whether just checking the parent is enough. |
|
135 | - | # NOTE: spacing configs will be available on both sides if they're common |
|
136 | - | # so it doesn't matter whether we get it from prev_block or next_block. |
|
137 | - | within_constraint = prev_block.stack_spacing_configs.get(common[-1], None) |
|
138 | - | if not within_constraint: |
|
139 | - | pass |
|
140 | - | elif within_constraint in ("touch", "inline"): |
|
141 | - | # NOTE: inline is actually a more extreme version of "touch". |
|
142 | - | # Examples: |
|
143 | - | # - "inline" would be used with an object reference, where the |
|
144 | - | # parts have to all be together on one line like `a.b.c`. |
|
145 | - | # - "touch" would allow the above layout, _but also_ allow an |
|
146 | - | # an optional line break between, much like between an opening |
|
147 | - | # bracket and the following element: `(a)` or: |
|
148 | - | # ``` |
|
149 | - | # ( |
|
150 | - | # a |
|
151 | - | # ) |
|
152 | - | # ``` |
|
153 | - | if within_constraint == "inline": |
|
154 | - | # If they are then strip newlines. |
|
155 | - | strip_newlines = True |
|
156 | - | # If segments are expected to be touch within. Then modify |
|
157 | - | # constraints accordingly. |
|
158 | - | # NOTE: We don't override if it's already "any" |
|
159 | - | if pre_constraint != "any": |
|
160 | - | pre_constraint = "touch" |
|
161 | - | if post_constraint != "any": |
|
162 | - | post_constraint = "touch" |
|
163 | - | else: # pragma: no cover |
|
164 | - | idx = prev_block.depth_info.stack_hashes.index(common[-1]) |
|
165 | - | raise NotImplementedError( |
|
166 | - | f"Unexpected within constraint: {within_constraint} for " |
|
167 | - | f"{prev_block.depth_info.stack_class_types[idx]}" |
|
168 | - | ) |
|
169 | - | ||
170 | - | return pre_constraint, post_constraint, strip_newlines |
|
171 | - | ||
172 | - | @staticmethod |
|
173 | - | def _process_spacing( |
|
174 | - | segment_buffer: List[RawSegment], strip_newlines: bool = False |
|
175 | - | ) -> Tuple[List[RawSegment], Optional[RawSegment], List[LintFix]]: |
|
176 | - | """Given the existing spacing, extract information and do basic pruning.""" |
|
177 | - | removal_buffer: List[RawSegment] = [] |
|
178 | - | last_whitespace: List[RawSegment] = [] |
|
179 | - | ||
180 | - | # Loop through the existing segments looking for spacing. |
|
181 | - | for seg in segment_buffer: |
|
182 | - | ||
183 | - | # If it's whitespace, store it. |
|
184 | - | if seg.is_type("whitespace"): |
|
185 | - | last_whitespace.append(seg) |
|
186 | - | ||
187 | - | # If it's a newline, react accordingly. |
|
188 | - | elif seg.is_type("newline", "end_of_file"): |
|
189 | - | ||
190 | - | # Are we stripping newlines? |
|
191 | - | if strip_newlines and seg.is_type("newline"): |
|
192 | - | reflow_logger.debug(" Stripping newline: %s", seg) |
|
193 | - | removal_buffer.append(seg) |
|
194 | - | # Carry on as though it wasn't here. |
|
195 | - | continue |
|
196 | - | ||
197 | - | # Check if we've just passed whitespace. If we have, remove it |
|
198 | - | # as trailing whitespace, both from the buffer and create a fix. |
|
199 | - | if last_whitespace: |
|
200 | - | reflow_logger.debug(" Removing trailing whitespace.") |
|
201 | - | for ws in last_whitespace: |
|
202 | - | removal_buffer.append(ws) |
|
203 | - | ||
204 | - | # Regardless, unset last_whitespace. |
|
205 | - | # We either just deleted it, or it's not relevant for any future |
|
206 | - | # segments. |
|
207 | - | last_whitespace = [] |
|
208 | - | ||
209 | - | if len(last_whitespace) >= 2: |
|
210 | - | reflow_logger.debug(" Removing adjoining whitespace.") |
|
211 | - | # If we find multiple sequential whitespaces, it's the sign |
|
212 | - | # that we've removed something. Only the first one should be |
|
213 | - | # a valid indent (or the one we consider for constraints). |
|
214 | - | # Remove all the following ones. |
|
215 | - | for ws in last_whitespace[1:]: |
|
216 | - | removal_buffer.append(ws) |
|
217 | - | ||
218 | - | # Turn the removal buffer updated segment buffer, last whitespace |
|
219 | - | # and associated fixes. |
|
220 | - | return ( |
|
221 | - | [s for s in segment_buffer if s not in removal_buffer], |
|
222 | - | # We should have removed all other whitespace by now. |
|
223 | - | last_whitespace[0] if last_whitespace else None, |
|
224 | - | [LintFix.delete(s) for s in removal_buffer], |
|
225 | - | ) |
|
226 | - | ||
227 | - | @staticmethod |
|
228 | - | def _determine_aligned_inline_spacing( |
|
229 | - | root_segment: BaseSegment, |
|
230 | - | whitespace_seg: RawSegment, |
|
231 | - | next_seg: RawSegment, |
|
232 | - | segment_type: str, |
|
233 | - | align_within: Optional[str], |
|
234 | - | align_boundary: Optional[str], |
|
235 | - | ) -> str: |
|
236 | - | """Work out spacing for instance of an `align` constraint.""" |
|
237 | - | # Find the level of segment that we're aligning. |
|
238 | - | # NOTE: Reverse slice |
|
239 | - | parent_segment = None |
|
240 | - | for ps in root_segment.path_to(next_seg)[::-1]: |
|
241 | - | if ps.segment.is_type(align_within): |
|
242 | - | parent_segment = ps.segment |
|
243 | - | if ps.segment.is_type(align_boundary): |
|
244 | - | break |
|
245 | - | ||
246 | - | if not parent_segment: |
|
247 | - | reflow_logger.debug( |
|
248 | - | " No Parent found for alignment case. Treat as single." |
|
249 | - | ) |
|
250 | - | return " " |
|
251 | - | ||
252 | - | # We've got a parent. Find some siblings. |
|
253 | - | reflow_logger.debug(" Determining alignment within: %s", parent_segment) |
|
254 | - | siblings = [] |
|
255 | - | for sibling in parent_segment.recursive_crawl(segment_type): |
|
256 | - | # Purge any siblings with a boundary between them |
|
257 | - | if not any( |
|
258 | - | ps.segment.is_type(align_boundary) |
|
259 | - | for ps in parent_segment.path_to(sibling) |
|
260 | - | ): |
|
261 | - | siblings.append(sibling) |
|
262 | - | else: |
|
263 | - | reflow_logger.debug( |
|
264 | - | " Purging a sibling because they're blocked " |
|
265 | - | "by a boundary: %s", |
|
266 | - | sibling, |
|
267 | - | ) |
|
268 | - | ||
269 | - | # Is the current indent the only one on the line? |
|
270 | - | if any( |
|
271 | - | # Same line |
|
272 | - | sibling.pos_marker.working_line_no == next_seg.pos_marker.working_line_no |
|
273 | - | # And not same position (i.e. not self) |
|
274 | - | and sibling.pos_marker.working_line_pos |
|
275 | - | != next_seg.pos_marker.working_line_pos |
|
276 | - | for sibling in siblings |
|
277 | - | ): |
|
278 | - | reflow_logger.debug(" Found sibling on same line. Treat as single") |
|
279 | - | return " " |
|
280 | - | ||
281 | - | # Work out the current spacing before each. |
|
282 | - | last_code = None |
|
283 | - | max_desired_line_pos = 0 |
|
284 | - | for seg in parent_segment.raw_segments: |
|
285 | - | for sibling in siblings: |
|
286 | - | # NOTE: We're asserting that there must have been |
|
287 | - | # a last_code. Otherwise this won't work. |
|
288 | - | if ( |
|
289 | - | seg.pos_marker.working_loc == sibling.pos_marker.working_loc |
|
290 | - | and last_code |
|
291 | - | ): |
|
292 | - | loc = last_code.pos_marker.working_loc_after(last_code.raw) |
|
293 | - | reflow_logger.debug( |
|
294 | - | " loc for %s: %s from %s", |
|
295 | - | sibling, |
|
296 | - | loc, |
|
297 | - | last_code, |
|
298 | - | ) |
|
299 | - | if loc[1] > max_desired_line_pos: |
|
300 | - | max_desired_line_pos = loc[1] |
|
301 | - | if seg.is_code: |
|
302 | - | last_code = seg |
|
303 | - | ||
304 | - | desired_space = " " * ( |
|
305 | - | 1 + max_desired_line_pos - whitespace_seg.pos_marker.working_line_pos |
|
306 | - | ) |
|
307 | - | reflow_logger.debug( |
|
308 | - | " desired_space: %r (based on max line pos of %s)", |
|
309 | - | desired_space, |
|
310 | - | max_desired_line_pos, |
|
311 | - | ) |
|
312 | - | return desired_space |
|
313 | - | ||
314 | - | @classmethod |
|
315 | - | def _handle_respace__inline_with_space( |
|
316 | - | cls, |
|
317 | - | pre_constraint: str, |
|
318 | - | post_constraint: str, |
|
319 | - | next_block: Optional[ReflowBlock], |
|
320 | - | root_segment: BaseSegment, |
|
321 | - | segment_buffer: List[RawSegment], |
|
322 | - | last_whitespace: RawSegment, |
|
323 | - | ) -> Tuple[List[RawSegment], List[LintFix]]: |
|
324 | - | """Check inline spacing is the right size. |
|
325 | - | ||
326 | - | This forms one of the cases handled by .respace_point(). |
|
327 | - | ||
328 | - | This code assumes: |
|
329 | - | - a ReflowPoint with no newlines. |
|
330 | - | - a ReflowPoint which has _some_ whitespace. |
|
331 | - | ||
332 | - | Given this we apply constraints to ensure the whitespace |
|
333 | - | is of an appropriate size. |
|
334 | - | """ |
|
335 | - | new_fixes: List[LintFix] = [] |
|
336 | - | # Get some indices so that we can reference around them |
|
337 | - | ws_idx = segment_buffer.index(last_whitespace) |
|
338 | - | ||
339 | - | # Do we have either side set to "any" |
|
340 | - | if "any" in [pre_constraint, post_constraint]: |
|
341 | - | # In this instance - don't change anything. |
|
342 | - | # e.g. this could mean there is a comment on one side. |
|
343 | - | return segment_buffer, new_fixes |
|
344 | - | ||
345 | - | # Do we have either side set to "touch"? |
|
346 | - | if "touch" in [pre_constraint, post_constraint]: |
|
347 | - | # In this instance - no whitespace is correct, This |
|
348 | - | # means we should delete it. |
|
349 | - | new_fixes.append( |
|
350 | - | LintFix( |
|
351 | - | "delete", |
|
352 | - | anchor=last_whitespace, |
|
353 | - | ) |
|
354 | - | ) |
|
355 | - | segment_buffer.pop(ws_idx) |
|
356 | - | return segment_buffer, new_fixes |
|
357 | - | ||
358 | - | # Handle left alignment & singles |
|
359 | - | if ( |
|
360 | - | post_constraint.startswith("align") and next_block |
|
361 | - | ) or pre_constraint == post_constraint == "single": |
|
362 | - | ||
363 | - | # Determine the desired spacing, either as alignment or as a single. |
|
364 | - | if post_constraint.startswith("align") and next_block: |
|
365 | - | alignment_config = post_constraint.split(":") |
|
366 | - | seg_type = alignment_config[1] |
|
367 | - | align_within = ( |
|
368 | - | alignment_config[2] if len(alignment_config) > 2 else None |
|
369 | - | ) |
|
370 | - | align_boundary = ( |
|
371 | - | alignment_config[3] if len(alignment_config) > 3 else None |
|
372 | - | ) |
|
373 | - | reflow_logger.debug( |
|
374 | - | " Alignment Config: %s, %s, %s, %s", |
|
375 | - | seg_type, |
|
376 | - | align_within, |
|
377 | - | align_boundary, |
|
378 | - | next_block.segments[0].pos_marker.working_line_pos, |
|
379 | - | ) |
|
380 | - | ||
381 | - | desired_space = cls._determine_aligned_inline_spacing( |
|
382 | - | root_segment, |
|
383 | - | last_whitespace, |
|
384 | - | next_block.segments[0], |
|
385 | - | seg_type, |
|
386 | - | align_within, |
|
387 | - | align_boundary, |
|
388 | - | ) |
|
389 | - | else: |
|
390 | - | desired_space = " " |
|
391 | - | ||
392 | - | if last_whitespace.raw != desired_space: |
|
393 | - | new_seg = last_whitespace.edit(desired_space) |
|
394 | - | new_fixes.append( |
|
395 | - | LintFix( |
|
396 | - | "replace", |
|
397 | - | anchor=last_whitespace, |
|
398 | - | edit=[new_seg], |
|
399 | - | ) |
|
400 | - | ) |
|
401 | - | segment_buffer[ws_idx] = new_seg |
|
402 | - | ||
403 | - | return segment_buffer, new_fixes |
|
404 | - | ||
405 | - | raise NotImplementedError( # pragma: no cover |
|
406 | - | f"Unexpected Constraints: {pre_constraint}, {post_constraint}" |
|
407 | - | ) |
|
408 | - | ||
409 | - | @staticmethod |
|
410 | - | def _handle_respace__inline_without_space( |
|
411 | - | pre_constraint: str, |
|
412 | - | post_constraint: str, |
|
413 | - | prev_block: Optional[ReflowBlock], |
|
414 | - | next_block: Optional[ReflowBlock], |
|
415 | - | segment_buffer: List[RawSegment], |
|
416 | - | existing_fixes: List[LintFix], |
|
417 | - | anchor_on: str = "before", |
|
418 | - | ) -> Tuple[List[RawSegment], List[LintFix], bool]: |
|
419 | - | """Ensure spacing is the right size. |
|
420 | - | ||
421 | - | This forms one of the cases handled by .respace_point(). |
|
422 | - | ||
423 | - | This code assumes: |
|
424 | - | - a ReflowPoint with no newlines. |
|
425 | - | - a ReflowPoint which _no_ whitespace. |
|
426 | - | ||
427 | - | Given this we apply constraints to either confirm no |
|
428 | - | spacing is required or create some of the right size. |
|
429 | - | """ |
|
430 | - | edited = False |
|
431 | - | new_fixes: List[LintFix] = [] |
|
432 | - | # Do we have either side set to "touch" or "any" |
|
433 | - | if {"touch", "any"}.intersection([pre_constraint, post_constraint]): |
|
434 | - | # In this instance - no whitespace is correct. |
|
435 | - | # Either because there shouldn't be, or because "any" |
|
436 | - | # means we shouldn't check. |
|
437 | - | pass |
|
438 | - | # Handle the default case |
|
439 | - | elif pre_constraint == post_constraint == "single": |
|
440 | - | # Insert a single whitespace. |
|
441 | - | reflow_logger.debug(" Inserting Single Whitespace.") |
|
442 | - | # Add it to the buffer first (the easy bit). The hard bit |
|
443 | - | # is to then determine how to generate the appropriate LintFix |
|
444 | - | # objects. |
|
445 | - | segment_buffer.append(WhitespaceSegment()) |
|
446 | - | edited = True |
|
447 | - | ||
448 | - | # So special handling here. If segments either side |
|
449 | - | # already exist then we don't care which we anchor on |
|
450 | - | # but if one is already an insertion (as shown by a lack) |
|
451 | - | # of pos_marker, then we should piggy back on that pre-existing |
|
452 | - | # fix. |
|
453 | - | existing_fix = None |
|
454 | - | insertion = None |
|
455 | - | if prev_block and not prev_block.segments[-1].pos_marker: |
|
456 | - | existing_fix = "after" |
|
457 | - | insertion = prev_block.segments[-1] |
|
458 | - | elif next_block and not next_block.segments[0].pos_marker: |
|
459 | - | existing_fix = "before" |
|
460 | - | insertion = next_block.segments[0] |
|
461 | - | ||
462 | - | if existing_fix: |
|
463 | - | reflow_logger.debug(" Detected existing fix %s", existing_fix) |
|
464 | - | if not existing_fixes: # pragma: no cover |
|
465 | - | raise ValueError( |
|
466 | - | "Fixes detected, but none passed to .respace(). " |
|
467 | - | "This will cause conflicts." |
|
468 | - | ) |
|
469 | - | # Find the fix |
|
470 | - | for fix in existing_fixes: |
|
471 | - | # Does it contain the insertion? |
|
472 | - | # TODO: This feels ugly - eq for BaseSegment is different |
|
473 | - | # to uuid matching for RawSegment. Perhaps this should be |
|
474 | - | # more aligned. There might be a better way of doing this. |
|
475 | - | if ( |
|
476 | - | insertion |
|
477 | - | and fix.edit |
|
478 | - | and insertion.uuid in [elem.uuid for elem in fix.edit] |
|
479 | - | ): |
|
480 | - | break |
|
481 | - | else: # pragma: no cover |
|
482 | - | reflow_logger.warning("Fixes %s", existing_fixes) |
|
483 | - | raise ValueError(f"Couldn't find insertion for {insertion}") |
|
484 | - | # Mutate the existing fix |
|
485 | - | assert fix |
|
486 | - | assert fix.edit # It's going to be an edit if we've picked it up. |
|
487 | - | if existing_fix == "before": |
|
488 | - | fix.edit = [cast(BaseSegment, WhitespaceSegment())] + fix.edit |
|
489 | - | elif existing_fix == "after": |
|
490 | - | fix.edit = fix.edit + [cast(BaseSegment, WhitespaceSegment())] |
|
491 | - | else: |
|
492 | - | reflow_logger.debug(" Not Detected existing fix. Creating new") |
|
493 | - | # Take into account hint on where to anchor if given. |
|
494 | - | if prev_block and anchor_on != "after": |
|
495 | - | new_fixes.append( |
|
496 | - | LintFix( |
|
497 | - | "create_after", |
|
498 | - | anchor=prev_block.segments[-1], |
|
499 | - | edit=[WhitespaceSegment()], |
|
500 | - | ) |
|
501 | - | ) |
|
502 | - | elif next_block: |
|
503 | - | new_fixes.append( |
|
504 | - | LintFix( |
|
505 | - | "create_before", |
|
506 | - | anchor=next_block.segments[0], |
|
507 | - | edit=[WhitespaceSegment()], |
|
508 | - | ) |
|
509 | - | ) |
|
510 | - | else: # pragma: no cover |
|
511 | - | NotImplementedError( |
|
512 | - | "Not set up to handle a missing _after_ and _before_." |
|
513 | - | ) |
|
514 | - | else: # pragma: no cover |
|
515 | - | # TODO: This will get test coverage when configuration routines |
|
516 | - | # are in properly. |
|
517 | - | raise NotImplementedError( |
|
518 | - | f"Unexpected Constraints: {pre_constraint}, {post_constraint}" |
|
519 | - | ) |
|
520 | - | ||
521 | - | return segment_buffer, existing_fixes + new_fixes, edited |
|
522 | - | ||
523 | 127 | def _get_indent_segment(self) -> Optional[RawSegment]: |
|
524 | 128 | """Get the current indent segment (if there).""" |
|
525 | 129 | indent = None |
657 | 261 | line breaks. The default operation of `respace` does not enable it |
|
658 | 262 | however it exists as a convenience for rules which wish to use it. |
|
659 | 263 | """ |
|
660 | - | pre_constraint, post_constraint, strip_newlines = self._determine_constraints( |
|
264 | + | pre_constraint, post_constraint, strip_newlines = determine_constraints( |
|
661 | 265 | prev_block, next_block, strip_newlines |
|
662 | 266 | ) |
|
663 | 267 | ||
664 | 268 | reflow_logger.debug("Respacing: %s", self) |
|
665 | 269 | ||
666 | 270 | # The buffer is used to create the new reflow point to return |
|
667 | - | segment_buffer, last_whitespace, new_fixes = self._process_spacing( |
|
271 | + | segment_buffer, last_whitespace, new_fixes = process_spacing( |
|
668 | 272 | list(self.segments), strip_newlines |
|
669 | 273 | ) |
|
670 | 274 |
719 | 323 | # Do we at least have _some_ whitespace? |
|
720 | 324 | if last_whitespace: |
|
721 | 325 | # We do - is it the right size? |
|
722 | - | segment_buffer, delta_fixes = self._handle_respace__inline_with_space( |
|
326 | + | segment_buffer, delta_fixes = handle_respace__inline_with_space( |
|
723 | 327 | pre_constraint, |
|
724 | 328 | post_constraint, |
|
725 | 329 | next_block, |
731 | 335 | else: |
|
732 | 336 | # No. Should we insert some? |
|
733 | 337 | # NOTE: This method operates on the existing fix buffer. |
|
734 | - | ( |
|
735 | - | segment_buffer, |
|
736 | - | fixes, |
|
737 | - | edited, |
|
738 | - | ) = self._handle_respace__inline_without_space( |
|
338 | + | (segment_buffer, fixes, edited,) = handle_respace__inline_without_space( |
|
739 | 339 | pre_constraint, |
|
740 | 340 | post_constraint, |
|
741 | 341 | prev_block, |
752 | 352 | reflow_logger.debug(" New Fixes: %s", new_fixes) |
|
753 | 353 | ||
754 | 354 | return fixes + new_fixes, ReflowPoint(tuple(segment_buffer)) |
|
355 | + | ||
356 | + | ||
357 | + | ReflowSequenceType = List[Union[ReflowBlock, ReflowPoint]] |
Learn more Showing 3 files with coverage changes found.
src/sqlfluff/utils/reflow/reindent.py
src/sqlfluff/utils/reflow/rebreak.py
src/sqlfluff/utils/reflow/respace.py
Files | Coverage |
---|---|
src/sqlfluff | -0.01% 99.99% |
Project Totals (188 files) | 99.99% |
#3919
f4457b8
fd92f60
7de21ac