@@ -103,9 +103,9 @@ public LoCInfo extract(TextualExtractor textualExtractor) {
103
103
try {
104
104
parser = new ParserImpl (new StreamReader (textualExtractor .getSource ()));
105
105
resolver = new Resolver ();
106
-
107
106
int idx = 0 ;
108
- while (!atStreamEnd ()) extractDocument (fileLabel , idx ++);
107
+ while (!atStreamEnd ())
108
+ extractDocument (fileLabel , idx ++, textualExtractor .getSource ().codePoints ().toArray ());
109
109
} catch (MarkedYAMLException e ) {
110
110
int line = e .getProblemMark ().getLine () + 1 ;
111
111
int column = e .getProblemMark ().getColumn () + 1 ;
@@ -136,16 +136,16 @@ private boolean atStreamEnd() {
136
136
}
137
137
138
138
/** Extract a complete YAML document; cf. {@link Composer#composeDocument}. */
139
- private void extractDocument (Label parent , int idx ) {
139
+ private void extractDocument (Label parent , int idx , int [] codepoints ) {
140
140
// Drop the DOCUMENT-START event
141
141
parser .getEvent ();
142
- extractNode (parent , idx );
142
+ extractNode (parent , idx , codepoints );
143
143
// Drop the DOCUMENT-END event
144
144
parser .getEvent ();
145
145
}
146
146
147
147
/** Extract a single YAML node; cf. {@link Composer#composeNode}. */
148
- private void extractNode (Label parent , int idx ) {
148
+ private void extractNode (Label parent , int idx , int [] codepoints ) {
149
149
Label label = trapWriter .freshLabel ();
150
150
NodeKind kind ;
151
151
String tag = "" ;
@@ -169,15 +169,14 @@ private void extractNode(Label parent, int idx) {
169
169
scalar .getImplicit ().canOmitTagInPlainScalar ());
170
170
Character style = scalar .getStyle ();
171
171
int styleCode = style == null ? 0 : (int ) style ;
172
- trapWriter .addTuple (
173
- YAMLTables .YAML_SCALARS , label , styleCode , scalar .getValue ());
172
+ trapWriter .addTuple (YAMLTables .YAML_SCALARS , label , styleCode , scalar .getValue ());
174
173
} else if (start .is (Event .ID .SequenceStart )) {
175
174
kind = NodeKind .SEQUENCE ;
176
175
SequenceStartEvent sequenceStart = (SequenceStartEvent ) start ;
177
176
tag = getTag (sequenceStart .getTag (), NodeId .sequence , null , sequenceStart .getImplicit ());
178
177
179
178
int childIdx = 0 ;
180
- while (!parser .checkEvent (Event .ID .SequenceEnd )) extractNode (label , childIdx ++);
179
+ while (!parser .checkEvent (Event .ID .SequenceEnd )) extractNode (label , childIdx ++, codepoints );
181
180
182
181
end = parser .getEvent ();
183
182
} else if (start .is (Event .ID .MappingStart )) {
@@ -187,8 +186,8 @@ private void extractNode(Label parent, int idx) {
187
186
188
187
int childIdx = 1 ;
189
188
while (!parser .checkEvent (Event .ID .MappingEnd )) {
190
- extractNode (label , childIdx );
191
- extractNode (label , -childIdx );
189
+ extractNode (label , childIdx , codepoints );
190
+ extractNode (label , -childIdx , codepoints );
192
191
++childIdx ;
193
192
}
194
193
@@ -205,7 +204,7 @@ private void extractNode(Label parent, int idx) {
205
204
parent ,
206
205
idx ,
207
206
tag ,
208
- mkToString (start .getStartMark (), end .getEndMark ()));
207
+ mkToString (start .getStartMark (), end .getEndMark (), codepoints ));
209
208
extractLocation (label , start .getStartMark (), end .getEndMark ());
210
209
}
211
210
@@ -216,33 +215,30 @@ private String getTag(String explicitTag, NodeId kind, String value, boolean imp
216
215
return explicitTag ;
217
216
}
218
217
218
+ private static boolean isNewLine (int codePoint ) {
219
+ switch (codePoint ) {
220
+ case '\n' :
221
+ case '\r' :
222
+ case '\u0085' :
223
+ case '\u2028' :
224
+ case '\u2029' :
225
+ return true ;
226
+ default :
227
+ return false ;
228
+ }
229
+ }
230
+
219
231
/**
220
- * SnakeYAML doesn't directly expose the source text of nodes, but we can get a decent
221
- * approximation from the snippet associated with the node's start {@linkplain Mark}.
222
- *
223
- * <p>The snippet of a {@linkplain Mark} is meant to be used for diagnostic messages and consists
224
- * of two lines: the first line contains some context around the source position represented by
225
- * the mark, the second line contains a caret character positioned underneath the source position
226
- * itself.
227
- *
228
- * <p>To approximate the source text, we take the text on the first line and strip off the first
229
- * <i>n</i> characters, where <i>n</i> is the number of spaces preceding the caret character on
230
- * the second line.
231
- *
232
- * <p>This is only an approximation, since the context is limited to relatively short strings that
233
- * never extend across newlines, but it suffices for the purposes of <code>toString</code>.
232
+ * SnakeYAML doesn't directly expose the source text of nodes, but we also take the file contents
233
+ * as an array of Unicode code points. The start and end marks each contain an index into the code
234
+ * point stream (the end is exclusive), so we can reconstruct the snippet. For readability, we
235
+ * stop at the first encountered newline.
234
236
*/
235
- private String mkToString (Mark startMark , Mark endMark ) {
236
- String snippet = startMark .get_snippet (0 , Integer .MAX_VALUE );
237
- int nl = snippet .indexOf ('\n' );
238
- String context = snippet .substring (0 , nl );
239
- String src = context .substring (snippet .substring (nl + 1 ).indexOf ('^' ));
240
- int desiredStringLength = endMark .getColumn () - startMark .getColumn ();
241
- boolean hasAccessToDesiredString = src .length () >= desiredStringLength ;
242
- boolean isSingleLine = endMark .getLine () == startMark .getLine ();
243
- if (isSingleLine && hasAccessToDesiredString )
244
- src = src .substring (0 , desiredStringLength );
245
- return TextualExtractor .sanitiseToString (src );
237
+ private static String mkToString (Mark startMark , Mark endMark , int [] codepoints ) {
238
+ StringBuilder b = new StringBuilder ();
239
+ for (int i = startMark .getIndex (); i < endMark .getIndex () && !isNewLine (codepoints [i ]); i ++)
240
+ b .appendCodePoint (codepoints [i ]);
241
+ return TextualExtractor .sanitiseToString (b .toString ());
246
242
}
247
243
248
244
/** Emit a source ___location for a YAML node. */
0 commit comments