Coverage Report - org.yaml.snakeyaml.scanner.ScannerImpl
 
Classes in this File Line Coverage Branch Coverage Complexity
ScannerImpl
99%
804/809
95%
454/475
5.28
ScannerImpl$Chomping
100%
7/7
100%
8/8
5.28
 
 1  
 /**
 2  
  * Copyright (c) 2008-2011, http://www.snakeyaml.org
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.yaml.snakeyaml.scanner;
 18  
 
 19  
 import java.nio.ByteBuffer;
 20  
 import java.nio.charset.CharacterCodingException;
 21  
 import java.util.ArrayList;
 22  
 import java.util.HashMap;
 23  
 import java.util.Iterator;
 24  
 import java.util.LinkedHashMap;
 25  
 import java.util.List;
 26  
 import java.util.Map;
 27  
 import java.util.regex.Pattern;
 28  
 
 29  
 import org.yaml.snakeyaml.error.Mark;
 30  
 import org.yaml.snakeyaml.error.YAMLException;
 31  
 import org.yaml.snakeyaml.reader.StreamReader;
 32  
 import org.yaml.snakeyaml.tokens.AliasToken;
 33  
 import org.yaml.snakeyaml.tokens.AnchorToken;
 34  
 import org.yaml.snakeyaml.tokens.BlockEndToken;
 35  
 import org.yaml.snakeyaml.tokens.BlockEntryToken;
 36  
 import org.yaml.snakeyaml.tokens.BlockMappingStartToken;
 37  
 import org.yaml.snakeyaml.tokens.BlockSequenceStartToken;
 38  
 import org.yaml.snakeyaml.tokens.DirectiveToken;
 39  
 import org.yaml.snakeyaml.tokens.DocumentEndToken;
 40  
 import org.yaml.snakeyaml.tokens.DocumentStartToken;
 41  
 import org.yaml.snakeyaml.tokens.FlowEntryToken;
 42  
 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
 43  
 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
 44  
 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
 45  
 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
 46  
 import org.yaml.snakeyaml.tokens.KeyToken;
 47  
 import org.yaml.snakeyaml.tokens.ScalarToken;
 48  
 import org.yaml.snakeyaml.tokens.StreamEndToken;
 49  
 import org.yaml.snakeyaml.tokens.StreamStartToken;
 50  
 import org.yaml.snakeyaml.tokens.TagToken;
 51  
 import org.yaml.snakeyaml.tokens.TagTuple;
 52  
 import org.yaml.snakeyaml.tokens.Token;
 53  
 import org.yaml.snakeyaml.tokens.ValueToken;
 54  
 import org.yaml.snakeyaml.util.ArrayStack;
 55  
 import org.yaml.snakeyaml.util.UriEncoder;
 56  
 
 57  
 /**
 58  
  * <pre>
 59  
  * Scanner produces tokens of the following types:
 60  
  * STREAM-START
 61  
  * STREAM-END
 62  
  * DIRECTIVE(name, value)
 63  
  * DOCUMENT-START
 64  
  * DOCUMENT-END
 65  
  * BLOCK-SEQUENCE-START
 66  
  * BLOCK-MAPPING-START
 67  
  * BLOCK-END
 68  
  * FLOW-SEQUENCE-START
 69  
  * FLOW-MAPPING-START
 70  
  * FLOW-SEQUENCE-END
 71  
  * FLOW-MAPPING-END
 72  
  * BLOCK-ENTRY
 73  
  * FLOW-ENTRY
 74  
  * KEY
 75  
  * VALUE
 76  
  * ALIAS(value)
 77  
  * ANCHOR(value)
 78  
  * TAG(value)
 79  
  * SCALAR(value, plain, style)
 80  
  * Read comments in the Scanner code for more details.
 81  
  * </pre>
 82  
  */
 83  
 public final class ScannerImpl implements Scanner {
 84  1
     private final static Pattern NOT_HEXA = Pattern.compile("[^0-9A-Fa-f]");
 85  1
     public final static Map<Character, String> ESCAPE_REPLACEMENTS = new HashMap<Character, String>();
 86  1
     public final static Map<Character, Integer> ESCAPE_CODES = new HashMap<Character, Integer>();
 87  
 
 88  
     static {
 89  1
         ESCAPE_REPLACEMENTS.put(new Character('0'), "\0");
 90  1
         ESCAPE_REPLACEMENTS.put(new Character('a'), "\u0007");
 91  1
         ESCAPE_REPLACEMENTS.put(new Character('b'), "\u0008");
 92  1
         ESCAPE_REPLACEMENTS.put(new Character('t'), "\u0009");
 93  1
         ESCAPE_REPLACEMENTS.put(new Character('n'), "\n");
 94  1
         ESCAPE_REPLACEMENTS.put(new Character('v'), "\u000B");
 95  1
         ESCAPE_REPLACEMENTS.put(new Character('f'), "\u000C");
 96  1
         ESCAPE_REPLACEMENTS.put(new Character('r'), "\r");
 97  1
         ESCAPE_REPLACEMENTS.put(new Character('e'), "\u001B");
 98  1
         ESCAPE_REPLACEMENTS.put(new Character(' '), "\u0020");
 99  1
         ESCAPE_REPLACEMENTS.put(new Character('"'), "\"");
 100  1
         ESCAPE_REPLACEMENTS.put(new Character('\\'), "\\");
 101  1
         ESCAPE_REPLACEMENTS.put(new Character('N'), "\u0085");
 102  1
         ESCAPE_REPLACEMENTS.put(new Character('_'), "\u00A0");
 103  1
         ESCAPE_REPLACEMENTS.put(new Character('L'), "\u2028");
 104  1
         ESCAPE_REPLACEMENTS.put(new Character('P'), "\u2029");
 105  
 
 106  1
         ESCAPE_CODES.put(new Character('x'), 2);
 107  1
         ESCAPE_CODES.put(new Character('u'), 4);
 108  1
         ESCAPE_CODES.put(new Character('U'), 8);
 109  1
     }
 110  
     private final StreamReader reader;
 111  
     // Had we reached the end of the stream?
 112  3692
     private boolean done = false;
 113  
 
 114  
     // The number of unclosed '{' and '['. `flow_level == 0` means block
 115  
     // context.
 116  3692
     private int flowLevel = 0;
 117  
 
 118  
     // List of processed tokens that are not yet emitted.
 119  
     private List<Token> tokens;
 120  
 
 121  
     // Number of tokens that were emitted through the `get_token` method.
 122  3692
     private int tokensTaken = 0;
 123  
 
 124  
     // The current indentation level.
 125  3692
     private int indent = -1;
 126  
 
 127  
     // Past indentation levels.
 128  
     private ArrayStack<Integer> indents;
 129  
 
 130  
     // Variables related to simple keys treatment. See PyYAML.
 131  
 
 132  
     /**
 133  
      * <pre>
 134  
      * A simple key is a key that is not denoted by the '?' indicator.
 135  
      * Example of simple keys:
 136  
      *   ---
 137  
      *   block simple key: value
 138  
      *   ? not a simple key:
 139  
      *   : { flow simple key: value }
 140  
      * We emit the KEY token before all keys, so when we find a potential
 141  
      * simple key, we try to locate the corresponding ':' indicator.
 142  
      * Simple keys should be limited to a single line and 1024 characters.
 143  
      * 
 144  
      * Can a simple key start at the current position? A simple key may
 145  
      * start:
 146  
      * - at the beginning of the line, not counting indentation spaces
 147  
      *       (in block context),
 148  
      * - after '{', '[', ',' (in the flow context),
 149  
      * - after '?', ':', '-' (in the block context).
 150  
      * In the block context, this flag also signifies if a block collection
 151  
      * may start at the current position.
 152  
      * </pre>
 153  
      */
 154  3692
     private boolean allowSimpleKey = true;
 155  
 
 156  
     /*
 157  
      * Keep track of possible simple keys. This is a dictionary. The key is
 158  
      * `flow_level`; there can be no more that one possible simple key for each
 159  
      * level. The value is a SimpleKey record: (token_number, required, index,
 160  
      * line, column, mark) A simple key may start with ALIAS, ANCHOR, TAG,
 161  
      * SCALAR(flow), '[', or '{' tokens.
 162  
      */
 163  
     private Map<Integer, SimpleKey> possibleSimpleKeys;
 164  
 
 165  3692
     public ScannerImpl(StreamReader reader) {
 166  3692
         this.reader = reader;
 167  3692
         this.tokens = new ArrayList<Token>(100);
 168  3692
         this.indents = new ArrayStack<Integer>(10);
 169  
         // the order in possibleSimpleKeys is kept for nextPossibleSimpleKey()
 170  3692
         this.possibleSimpleKeys = new LinkedHashMap<Integer, SimpleKey>();
 171  3692
         fetchStreamStart();// Add the STREAM-START token.
 172  3692
     }
 173  
 
 174  
     /**
 175  
      * Check if the next token is one of the given types.
 176  
      */
 177  
     public boolean checkToken(Token.ID... choices) {
 178  3510680
         while (needMoreTokens()) {
 179  649823
             fetchMoreTokens();
 180  
         }
 181  2860855
         if (!this.tokens.isEmpty()) {
 182  2860735
             if (choices.length == 0) {
 183  1962
                 return true;
 184  
             }
 185  
             // since profiler puts this method on top we should not use
 186  
             // 'foreach' here
 187  2858773
             Token.ID first = this.tokens.get(0).getTokenId();
 188  5751008
             for (int i = 0; i < choices.length; i++) {
 189  3739635
                 if (first == choices[i]) {
 190  847400
                     return true;
 191  
                 }
 192  
             }
 193  
         }
 194  2011493
         return false;
 195  
     }
 196  
 
 197  
     /**
 198  
      * Return the next token, but do not delete if from the queue.
 199  
      */
 200  
     public Token peekToken() {
 201  444330
         while (needMoreTokens()) {
 202  1908
             fetchMoreTokens();
 203  
         }
 204  442422
         return this.tokens.get(0);
 205  
     }
 206  
 
 207  
     /**
 208  
      * Return the next token.
 209  
      */
 210  
     public Token getToken() {
 211  840588
         if (!this.tokens.isEmpty()) {
 212  840587
             this.tokensTaken++;
 213  840587
             return this.tokens.remove(0);
 214  
         }
 215  1
         return null;
 216  
     }
 217  
 
 218  
     // Private methods.
 219  
 
 220  
     private boolean needMoreTokens() {
 221  3955010
         if (this.done) {
 222  36237
             return false;
 223  
         }
 224  3918773
         if (this.tokens.isEmpty()) {
 225  390241
             return true;
 226  
         }
 227  
         // The current token may be a potential simple key, so we
 228  
         // need to look further.
 229  3528532
         stalePossibleSimpleKeys();
 230  3528530
         return nextPossibleSimpleKey() == this.tokensTaken;
 231  
     }
 232  
 
 233  
     private void fetchMoreTokens() {
 234  
         // Eat whitespaces and comments until we reach the next token.
 235  651731
         scanToNextToken();
 236  
         // Remove obsolete possible simple keys.
 237  651731
         stalePossibleSimpleKeys();
 238  
         // Compare the current indentation and column. It may add some tokens
 239  
         // and decrease the current indentation level.
 240  651731
         unwindIndent(reader.getColumn());
 241  
         // Peek the next character.
 242  651731
         char ch = reader.peek();
 243  651731
         switch (ch) {
 244  
         case '\0':
 245  
             // Is it the end of stream?
 246  3582
             fetchStreamEnd();
 247  3580
             return;
 248  
         case '%':
 249  
             // Is it a directive?
 250  1810
             if (checkDirective()) {
 251  1810
                 fetchDirective();
 252  1784
                 return;
 253  
             }
 254  
             break;
 255  
         case '-':
 256  
             // Is it the document start?
 257  117036
             if (checkDocumentStart()) {
 258  2336
                 fetchDocumentStart();
 259  2336
                 return;
 260  
                 // Is it the block entry indicator?
 261  114700
             } else if (checkBlockEntry()) {
 262  114646
                 fetchBlockEntry();
 263  114644
                 return;
 264  
             }
 265  
             break;
 266  
         case '.':
 267  
             // Is it the document end?
 268  156
             if (checkDocumentEnd()) {
 269  136
                 fetchDocumentEnd();
 270  136
                 return;
 271  
             }
 272  
             break;
 273  
         // TODO support for BOM within a stream. (not implemented in PyYAML)
 274  
         case '[':
 275  
             // Is it the flow sequence start indicator?
 276  1092
             fetchFlowSequenceStart();
 277  1092
             return;
 278  
         case '{':
 279  
             // Is it the flow mapping start indicator?
 280  2788
             fetchFlowMappingStart();
 281  2788
             return;
 282  
         case ']':
 283  
             // Is it the flow sequence end indicator?
 284  1089
             fetchFlowSequenceEnd();
 285  1087
             return;
 286  
         case '}':
 287  
             // Is it the flow mapping end indicator?
 288  2786
             fetchFlowMappingEnd();
 289  2786
             return;
 290  
         case ',':
 291  
             // Is it the flow entry indicator?
 292  3569
             fetchFlowEntry();
 293  3569
             return;
 294  
             // see block entry indicator above
 295  
         case '?':
 296  
             // Is it the key indicator?
 297  971
             if (checkKey()) {
 298  971
                 fetchKey();
 299  969
                 return;
 300  
             }
 301  
             break;
 302  
         case ':':
 303  
             // Is it the value indicator?
 304  140221
             if (checkValue()) {
 305  139915
                 fetchValue();
 306  139913
                 return;
 307  
             }
 308  
             break;
 309  
         case '*':
 310  
             // Is it an alias?
 311  1425
             fetchAlias();
 312  1425
             return;
 313  
         case '&':
 314  
             // Is it an anchor?
 315  1372
             fetchAnchor();
 316  1368
             return;
 317  
         case '!':
 318  
             // Is it a tag?
 319  12164
             fetchTag();
 320  12149
             return;
 321  
         case '|':
 322  
             // Is it a literal scalar?
 323  634
             if (this.flowLevel == 0) {
 324  634
                 fetchLiteral();
 325  634
                 return;
 326  
             }
 327  
             break;
 328  
         case '>':
 329  
             // Is it a folded scalar?
 330  554
             if (this.flowLevel == 0) {
 331  554
                 fetchFolded();
 332  548
                 return;
 333  
             }
 334  
             break;
 335  
         case '\'':
 336  
             // Is it a single quoted scalar?
 337  203279
             fetchSingle();
 338  203277
             return;
 339  
         case '"':
 340  
             // Is it a double quoted scalar?
 341  6067
             fetchDouble();
 342  6061
             return;
 343  
         }
 344  
         // It must be a plain scalar then.
 345  151516
         if (checkPlain()) {
 346  151513
             fetchPlain();
 347  151511
             return;
 348  
         }
 349  
         // No? It's an error. Let's produce a nice error message.
 350  3
         String chRepresentation = String.valueOf(ch);
 351  3
         for (Character s : ESCAPE_REPLACEMENTS.keySet()) {
 352  45
             String v = ESCAPE_REPLACEMENTS.get(s);
 353  45
             if (v.equals(chRepresentation)) {
 354  1
                 chRepresentation = "\\" + s;// ' ' -> '\t'
 355  1
                 break;
 356  
             }
 357  44
         }
 358  3
         throw new ScannerException("while scanning for the next token", null, "found character "
 359  
                 + ch + "'" + chRepresentation + "' that cannot start any token", reader.getMark());
 360  
     }
 361  
 
 362  
     // Simple keys treatment.
 363  
 
 364  
     /**
 365  
      * Return the number of the nearest possible simple key. Actually we don't
 366  
      * need to loop through the whole dictionary.
 367  
      */
 368  
     private int nextPossibleSimpleKey() {
 369  
         /*
 370  
          * the implementation is not as in PyYAML. Because
 371  
          * this.possibleSimpleKeys is ordered we can simply take the first key
 372  
          */
 373  3528530
         if (!this.possibleSimpleKeys.isEmpty()) {
 374  328867
             return this.possibleSimpleKeys.values().iterator().next().getTokenNumber();
 375  
         }
 376  3199663
         return -1;
 377  
     }
 378  
 
 379  
     /**
 380  
      * <pre>
 381  
      * Remove entries that are no longer possible simple keys. According to
 382  
      * the YAML specification, simple keys
 383  
      * - should be limited to a single line,
 384  
      * - should be no longer than 1024 characters.
 385  
      * Disabling this procedure will allow simple keys of any length and
 386  
      * height (may cause problems if indentation is broken though).
 387  
      * </pre>
 388  
      */
 389  
     private void stalePossibleSimpleKeys() {
 390  
         // use toRemove to avoid java.util.ConcurrentModificationException
 391  4180263
         if (!this.possibleSimpleKeys.isEmpty()) {
 392  591207
             for (Iterator<SimpleKey> iterator = this.possibleSimpleKeys.values().iterator(); iterator
 393  1190612
                     .hasNext();) {
 394  599407
                 SimpleKey key = iterator.next();
 395  599407
                 if ((key.getLine() != reader.getLine())
 396  
                         || (reader.getIndex() - key.getIndex() > 1024)) {
 397  105778
                     if (key.isRequired()) {
 398  2
                         throw new ScannerException("while scanning a simple key", key.getMark(),
 399  
                                 "could not found expected ':'", reader.getMark());
 400  
                     }
 401  105776
                     iterator.remove();
 402  
                 }
 403  599405
             }
 404  
         }
 405  4180261
     }
 406  
 
 407  
     /**
 408  
      * The next token may start a simple key. We check if it's possible and save
 409  
      * its position. This function is called for ALIAS, ANCHOR, TAG,
 410  
      * SCALAR(flow), '[', and '{'.
 411  
      */
 412  
     private void savePossibleSimpleKey() {
 413  
         // The next token may start a simple key. We check if it's possible
 414  
         // and save its position. This function is called for
 415  
         // ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
 416  
 
 417  
         // Check if a simple key is required at the current position.
 418  379700
         boolean required = ((this.flowLevel == 0) && (this.indent == this.reader.getColumn()));
 419  
 
 420  379700
         if (allowSimpleKey || !required) {
 421  
             // A simple key is required only if it is the first token in the
 422  
             // current
 423  
             // line. Therefore it is always allowed.
 424  
         } else {
 425  0
             throw new YAMLException(
 426  
                     "A simple key is required only if it is the first token in the current line");
 427  
         }
 428  
 
 429  
         // The next token might be a simple key. Let's save it's number and
 430  
         // position.
 431  379700
         if (this.allowSimpleKey) {
 432  247553
             removePossibleSimpleKey();
 433  247553
             int tokenNumber = this.tokensTaken + this.tokens.size();
 434  247553
             SimpleKey key = new SimpleKey(tokenNumber, required, reader.getIndex(),
 435  
                     reader.getLine(), this.reader.getColumn(), this.reader.getMark());
 436  247553
             this.possibleSimpleKeys.put(this.flowLevel, key);
 437  
         }
 438  379700
     }
 439  
 
 440  
     /**
 441  
      * Remove the saved possible key position at the current flow level.
 442  
      */
 443  
     private void removePossibleSimpleKey() {
 444  380606
         SimpleKey key = possibleSimpleKeys.remove(flowLevel);
 445  380606
         if (key != null && key.isRequired()) {
 446  4
             throw new ScannerException("while scanning a simple key", key.getMark(),
 447  
                     "could not found expected ':'", reader.getMark());
 448  
         }
 449  380602
     }
 450  
 
 451  
     // Indentation functions.
 452  
 
 453  
     /**
 454  
      * <pre>
 455  
      * In flow context, tokens should respect indentation.
 456  
      * Actually the condition should be `self.indent &gt;= column` according to
 457  
      * the spec. But this condition will prohibit intuitively correct
 458  
      * constructions such as
 459  
      * key : {
 460  
      * }
 461  
      * </pre>
 462  
      */
 463  
     private void unwindIndent(int col) {
 464  
         // In the flow context, indentation is ignored. We make the scanner less
 465  
         // restrictive then specification requires.
 466  659595
         if (this.flowLevel != 0) {
 467  28706
             return;
 468  
         }
 469  
 
 470  
         // In block context, we may need to issue the BLOCK-END tokens.
 471  654062
         while (this.indent > col) {
 472  23173
             Mark mark = reader.getMark();
 473  23173
             this.indent = this.indents.pop();
 474  23173
             this.tokens.add(new BlockEndToken(mark, mark));
 475  23173
         }
 476  630889
     }
 477  
 
 478  
     /**
 479  
      * Check if we need to increase indentation.
 480  
      */
 481  
     private boolean addIndent(int column) {
 482  250488
         if (this.indent < column) {
 483  23213
             this.indents.push(this.indent);
 484  23213
             this.indent = column;
 485  23213
             return true;
 486  
         }
 487  227275
         return false;
 488  
     }
 489  
 
 490  
     // Fetchers.
 491  
 
 492  
     /**
 493  
      * We always add STREAM-START as the first token and STREAM-END as the last
 494  
      * token.
 495  
      */
 496  
     private void fetchStreamStart() {
 497  
         // Read the token.
 498  3692
         Mark mark = reader.getMark();
 499  
 
 500  
         // Add STREAM-START.
 501  3692
         Token token = new StreamStartToken(mark, mark);
 502  3692
         this.tokens.add(token);
 503  3692
     }
 504  
 
 505  
     private void fetchStreamEnd() {
 506  
         // Set the current intendation to -1.
 507  3582
         unwindIndent(-1);
 508  
 
 509  
         // Reset simple keys.
 510  3582
         removePossibleSimpleKey();
 511  3580
         this.allowSimpleKey = false;
 512  3580
         this.possibleSimpleKeys.clear();
 513  
 
 514  
         // Read the token.
 515  3580
         Mark mark = reader.getMark();
 516  
 
 517  
         // Add STREAM-END.
 518  3580
         Token token = new StreamEndToken(mark, mark);
 519  3580
         this.tokens.add(token);
 520  
 
 521  
         // The stream is finished.
 522  3580
         this.done = true;
 523  3580
     }
 524  
 
 525  
     private void fetchDirective() {
 526  
         // Set the current intendation to -1.
 527  1810
         unwindIndent(-1);
 528  
 
 529  
         // Reset simple keys.
 530  1810
         removePossibleSimpleKey();
 531  1810
         this.allowSimpleKey = false;
 532  
 
 533  
         // Scan and add DIRECTIVE.
 534  1810
         Token tok = scanDirective();
 535  1784
         this.tokens.add(tok);
 536  1784
     }
 537  
 
 538  
     private void fetchDocumentStart() {
 539  2336
         fetchDocumentIndicator(true);
 540  2336
     }
 541  
 
 542  
     private void fetchDocumentEnd() {
 543  136
         fetchDocumentIndicator(false);
 544  136
     }
 545  
 
 546  
     private void fetchDocumentIndicator(boolean isDocumentStart) {
 547  
         // Set the current intendation to -1.
 548  2472
         unwindIndent(-1);
 549  
 
 550  
         // Reset simple keys. Note that there could not be a block collection
 551  
         // after '---'.
 552  2472
         removePossibleSimpleKey();
 553  2472
         this.allowSimpleKey = false;
 554  
 
 555  
         // Add DOCUMENT-START or DOCUMENT-END.
 556  2472
         Mark startMark = reader.getMark();
 557  2472
         reader.forward(3);
 558  2472
         Mark endMark = reader.getMark();
 559  
         Token token;
 560  2472
         if (isDocumentStart) {
 561  2336
             token = new DocumentStartToken(startMark, endMark);
 562  
         } else {
 563  136
             token = new DocumentEndToken(startMark, endMark);
 564  
         }
 565  2472
         this.tokens.add(token);
 566  2472
     }
 567  
 
 568  
     private void fetchFlowSequenceStart() {
 569  1092
         fetchFlowCollectionStart(false);
 570  1092
     }
 571  
 
 572  
     private void fetchFlowMappingStart() {
 573  2788
         fetchFlowCollectionStart(true);
 574  2788
     }
 575  
 
 576  
     private void fetchFlowCollectionStart(boolean isMappingStart) {
 577  
         // '[' and '{' may start a simple key.
 578  3880
         savePossibleSimpleKey();
 579  
 
 580  
         // Increase the flow level.
 581  3880
         this.flowLevel++;
 582  
 
 583  
         // Simple keys are allowed after '[' and '{'.
 584  3880
         this.allowSimpleKey = true;
 585  
 
 586  
         // Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
 587  3880
         Mark startMark = reader.getMark();
 588  3880
         reader.forward(1);
 589  3880
         Mark endMark = reader.getMark();
 590  
         Token token;
 591  3880
         if (isMappingStart) {
 592  2788
             token = new FlowMappingStartToken(startMark, endMark);
 593  
         } else {
 594  1092
             token = new FlowSequenceStartToken(startMark, endMark);
 595  
         }
 596  3880
         this.tokens.add(token);
 597  3880
     }
 598  
 
 599  
     private void fetchFlowSequenceEnd() {
 600  1089
         fetchFlowCollectionEnd(false);
 601  1087
     }
 602  
 
 603  
     private void fetchFlowMappingEnd() {
 604  2786
         fetchFlowCollectionEnd(true);
 605  2786
     }
 606  
 
 607  
     private void fetchFlowCollectionEnd(boolean isMappingEnd) {
 608  
         // Reset possible simple key on the current level.
 609  3875
         removePossibleSimpleKey();
 610  
 
 611  
         // Decrease the flow level.
 612  3873
         this.flowLevel--;
 613  
 
 614  
         // No simple keys after ']' or '}'.
 615  3873
         this.allowSimpleKey = false;
 616  
 
 617  
         // Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
 618  3873
         Mark startMark = reader.getMark();
 619  3873
         reader.forward();
 620  3873
         Mark endMark = reader.getMark();
 621  
         Token token;
 622  3873
         if (isMappingEnd) {
 623  2786
             token = new FlowMappingEndToken(startMark, endMark);
 624  
         } else {
 625  1087
             token = new FlowSequenceEndToken(startMark, endMark);
 626  
         }
 627  3873
         this.tokens.add(token);
 628  3873
     }
 629  
 
 630  
     private void fetchFlowEntry() {
 631  
         // Simple keys are allowed after ','.
 632  3569
         this.allowSimpleKey = true;
 633  
 
 634  
         // Reset possible simple key on the current level.
 635  3569
         removePossibleSimpleKey();
 636  
 
 637  
         // Add FLOW-ENTRY.
 638  3569
         Mark startMark = reader.getMark();
 639  3569
         reader.forward();
 640  3569
         Mark endMark = reader.getMark();
 641  3569
         Token token = new FlowEntryToken(startMark, endMark);
 642  3569
         this.tokens.add(token);
 643  3569
     }
 644  
 
 645  
     private void fetchBlockEntry() {
 646  
         // Block context needs additional checks.
 647  114646
         if (this.flowLevel == 0) {
 648  
             // Are we allowed to start a new entry?
 649  114646
             if (!this.allowSimpleKey) {
 650  2
                 throw new ScannerException(null, null, "sequence entries are not allowed here",
 651  
                         reader.getMark());
 652  
             }
 653  
 
 654  
             // We may need to add BLOCK-SEQUENCE-START.
 655  114644
             if (addIndent(this.reader.getColumn())) {
 656  603
                 Mark mark = reader.getMark();
 657  603
                 this.tokens.add(new BlockSequenceStartToken(mark, mark));
 658  
             }
 659  
         } else {
 660  
             // It's an error for the block entry to occur in the flow
 661  
             // context,but we let the parser detect this.
 662  
         }
 663  
         // Simple keys are allowed after '-'.
 664  114644
         this.allowSimpleKey = true;
 665  
 
 666  
         // Reset possible simple key on the current level.
 667  114644
         removePossibleSimpleKey();
 668  
 
 669  
         // Add BLOCK-ENTRY.
 670  114644
         Mark startMark = reader.getMark();
 671  114644
         reader.forward();
 672  114644
         Mark endMark = reader.getMark();
 673  114644
         Token token = new BlockEntryToken(startMark, endMark);
 674  114644
         this.tokens.add(token);
 675  114644
     }
 676  
 
 677  
     private void fetchKey() {
 678  
         // Block context needs additional checks.
 679  971
         if (this.flowLevel == 0) {
 680  
             // Are we allowed to start a key (not necessary a simple)?
 681  179
             if (!this.allowSimpleKey) {
 682  2
                 throw new ScannerException(null, null, "mapping keys are not allowed here",
 683  
                         reader.getMark());
 684  
             }
 685  
             // We may need to add BLOCK-MAPPING-START.
 686  177
             if (addIndent(this.reader.getColumn())) {
 687  79
                 Mark mark = reader.getMark();
 688  79
                 this.tokens.add(new BlockMappingStartToken(mark, mark));
 689  
             }
 690  
         }
 691  
         // Simple keys are allowed after '?' in the block context.
 692  969
         this.allowSimpleKey = this.flowLevel == 0;
 693  
 
 694  
         // Reset possible simple key on the current level.
 695  969
         removePossibleSimpleKey();
 696  
 
 697  
         // Add KEY.
 698  969
         Mark startMark = reader.getMark();
 699  969
         reader.forward();
 700  969
         Mark endMark = reader.getMark();
 701  969
         Token token = new KeyToken(startMark, endMark);
 702  969
         this.tokens.add(token);
 703  969
     }
 704  
 
 705  
     private void fetchValue() {
 706  
         // Do we determine a simple key?
 707  139915
         SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
 708  139915
         if (key != null) {
 709  
             // Add KEY.
 710  138969
             this.tokens.add(key.getTokenNumber() - this.tokensTaken, new KeyToken(key.getMark(),
 711  
                     key.getMark()));
 712  
 
 713  
             // If this key starts a new block mapping, we need to add
 714  
             // BLOCK-MAPPING-START.
 715  138969
             if (this.flowLevel == 0) {
 716  135507
                 if (addIndent(key.getColumn())) {
 717  22529
                     this.tokens.add(key.getTokenNumber() - this.tokensTaken,
 718  
                             new BlockMappingStartToken(key.getMark(), key.getMark()));
 719  
                 }
 720  
             }
 721  
             // There cannot be two simple keys one after another.
 722  138969
             this.allowSimpleKey = false;
 723  
 
 724  
         } else {// It must be a part of a complex key.
 725  
             // Block context needs additional checks.Do we really need them?
 726  
             // They
 727  
             // will be catched by the parser anyway.)
 728  946
             if (this.flowLevel == 0) {
 729  
 
 730  
                 // We are allowed to start a complex value if and only if we can
 731  
                 // start a simple key.
 732  162
                 if (!this.allowSimpleKey) {
 733  2
                     throw new ScannerException(null, null, "mapping values are not allowed here",
 734  
                             reader.getMark());
 735  
                 }
 736  
             }
 737  
 
 738  
             // If this value starts a new block mapping, we need to add
 739  
             // BLOCK-MAPPING-START. It will be detected as an error later by
 740  
             // the parser.
 741  944
             if (flowLevel == 0) {
 742  160
                 if (addIndent(reader.getColumn())) {
 743  2
                     Mark mark = reader.getMark();
 744  2
                     this.tokens.add(new BlockMappingStartToken(mark, mark));
 745  
                 }
 746  
             }
 747  
 
 748  
             // Simple keys are allowed after ':' in the block context.
 749  944
             allowSimpleKey = (flowLevel == 0);
 750  
 
 751  
             // Reset possible simple key on the current level.
 752  944
             removePossibleSimpleKey();
 753  
         }
 754  
         // Add VALUE.
 755  139913
         Mark startMark = reader.getMark();
 756  139913
         reader.forward();
 757  139913
         Mark endMark = reader.getMark();
 758  139913
         Token token = new ValueToken(startMark, endMark);
 759  139913
         this.tokens.add(token);
 760  139913
     }
 761  
 
 762  
     private void fetchAlias() {
 763  
         // ALIAS could be a simple key.
 764  1425
         savePossibleSimpleKey();
 765  
 
 766  
         // No simple keys after ALIAS.
 767  1425
         this.allowSimpleKey = false;
 768  
 
 769  
         // Scan and add ALIAS.
 770  1425
         Token tok = scanAnchor(false);
 771  1425
         this.tokens.add(tok);
 772  1425
     }
 773  
 
 774  
     private void fetchAnchor() {
 775  
         // ANCHOR could start a simple key.
 776  1372
         savePossibleSimpleKey();
 777  
 
 778  
         // No simple keys after ANCHOR.
 779  1372
         this.allowSimpleKey = false;
 780  
 
 781  
         // Scan and add ANCHOR.
 782  1372
         Token tok = scanAnchor(true);
 783  1368
         this.tokens.add(tok);
 784  1368
     }
 785  
 
 786  
     private void fetchTag() {
 787  
         // TAG could start a simple key.
 788  12164
         savePossibleSimpleKey();
 789  
 
 790  
         // No simple keys after TAG.
 791  12164
         this.allowSimpleKey = false;
 792  
 
 793  
         // Scan and add TAG.
 794  12164
         Token tok = scanTag();
 795  12149
         this.tokens.add(tok);
 796  12149
     }
 797  
 
 798  
     private void fetchLiteral() {
 799  634
         fetchBlockScalar('|');
 800  634
     }
 801  
 
 802  
     private void fetchFolded() {
 803  554
         fetchBlockScalar('>');
 804  548
     }
 805  
 
 806  
     private void fetchBlockScalar(char style) {
 807  
         // A simple key may follow a block scalar.
 808  1188
         this.allowSimpleKey = true;
 809  
 
 810  
         // Reset possible simple key on the current level.
 811  1188
         removePossibleSimpleKey();
 812  
 
 813  
         // Scan and add SCALAR.
 814  1188
         Token tok = scanBlockScalar(style);
 815  1182
         this.tokens.add(tok);
 816  1182
     }
 817  
 
 818  
     private void fetchSingle() {
 819  203279
         fetchFlowScalar('\'');
 820  203277
     }
 821  
 
 822  
     private void fetchDouble() {
 823  6067
         fetchFlowScalar('"');
 824  6061
     }
 825  
 
 826  
     private void fetchFlowScalar(char style) {
 827  
         // A flow scalar could be a simple key.
 828  209346
         savePossibleSimpleKey();
 829  
 
 830  
         // No simple keys after flow scalars.
 831  209346
         this.allowSimpleKey = false;
 832  
 
 833  
         // Scan and add SCALAR.
 834  209346
         Token tok = scanFlowScalar(style);
 835  209338
         this.tokens.add(tok);
 836  209338
     }
 837  
 
 838  
     private void fetchPlain() {
 839  
         // A plain scalar could be a simple key.
 840  151513
         savePossibleSimpleKey();
 841  
 
 842  
         // No simple keys after plain scalars. But note that `scan_plain` will
 843  
         // change this flag if the scan is finished at the beginning of the
 844  
         // line.
 845  151513
         this.allowSimpleKey = false;
 846  
 
 847  
         // Scan and add SCALAR. May change `allow_simple_key`.
 848  151513
         Token tok = scanPlain();
 849  151511
         this.tokens.add(tok);
 850  151511
     }
 851  
 
 852  
     // Checkers.
 853  
 
 854  
     private boolean checkDirective() {
 855  
         // DIRECTIVE: ^ '%' ...
 856  
         // The '%' indicator is already checked.
 857  1810
         return reader.getColumn() == 0;
 858  
     }
 859  
 
 860  
     private boolean checkDocumentStart() {
 861  
         // DOCUMENT-START: ^ '---' (' '|'\n')
 862  117036
         if (reader.getColumn() == 0) {
 863  16278
             if ("---".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 864  2336
                 return true;
 865  
             }
 866  
         }
 867  114700
         return false;
 868  
     }
 869  
 
 870  
     private boolean checkDocumentEnd() {
 871  
         // DOCUMENT-END: ^ '...' (' '|'\n')
 872  156
         if (reader.getColumn() == 0) {
 873  147
             if ("...".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 874  136
                 return true;
 875  
             }
 876  
         }
 877  20
         return false;
 878  
     }
 879  
 
 880  
     private boolean checkBlockEntry() {
 881  
         // BLOCK-ENTRY: '-' (' '|'\n')
 882  114700
         return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
 883  
     }
 884  
 
 885  
     private boolean checkKey() {
 886  
         // KEY(flow context): '?'
 887  971
         if (this.flowLevel != 0) {
 888  792
             return true;
 889  
         } else {
 890  
             // KEY(block context): '?' (' '|'\n')
 891  179
             return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
 892  
         }
 893  
     }
 894  
 
 895  
     private boolean checkValue() {
 896  
         // VALUE(flow context): ':'
 897  140221
         if (flowLevel != 0) {
 898  4246
             return true;
 899  
         } else {
 900  
             // VALUE(block context): ':' (' '|'\n')
 901  135975
             return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
 902  
         }
 903  
     }
 904  
 
 905  
     private boolean checkPlain() {
 906  
         /**
 907  
          * <pre>
 908  
          * A plain scalar may start with any non-space character except:
 909  
          *   '-', '?', ':', ',', '[', ']', '{', '}',
 910  
          *   '#', '&amp;', '*', '!', '|', '&gt;', '\'', '\&quot;',
 911  
          *   '%', '@', '`'.
 912  
          * 
 913  
          * It may also start with
 914  
          *   '-', '?', ':'
 915  
          * if it is followed by a non-space character.
 916  
          * 
 917  
          * Note that we limit the last rule to the block context (except the
 918  
          * '-' character) because we want the flow context to be space
 919  
          * independent.
 920  
          * </pre>
 921  
          */
 922  151516
         char ch = reader.peek();
 923  151516
         return Constant.NULL_BL_T_LINEBR.hasNo(ch, "-?:,[]{}#&*!|>\'\"%@`")
 924  
                 || (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(1)) && (ch == '-' || (this.flowLevel == 0 && "?:"
 925  
                         .indexOf(ch) != -1)));
 926  
     }
 927  
 
 928  
     // Scanners.
 929  
 
 930  
     /**
 931  
      * <pre>
 932  
      * We ignore spaces, line breaks and comments.
 933  
      * If we find a line break in the block context, we set the flag
 934  
      * `allow_simple_key` on.
 935  
      * The byte order mark is stripped if it's the first character in the
 936  
      * stream. We do not yet support BOM inside the stream as the
 937  
      * specification requires. Any such mark will be considered as a part
 938  
      * of the document.
 939  
      * TODO: We need to make tab handling rules more sane. A good rule is
 940  
      *   Tabs cannot precede tokens
 941  
      *   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
 942  
      *   KEY(block), VALUE(block), BLOCK-ENTRY
 943  
      * So the checking code is
 944  
      *   if &lt;TAB&gt;:
 945  
      *       self.allow_simple_keys = False
 946  
      * We also need to add the check for `allow_simple_keys == True` to
 947  
      * `unwind_indent` before issuing BLOCK-END.
 948  
      * Scanners for block, flow, and plain scalars need to be modified.
 949  
      * </pre>
 950  
      */
 951  
     private void scanToNextToken() {
 952  651731
         if (reader.getIndex() == 0 && reader.peek() == '\uFEFF') {
 953  0
             reader.forward();
 954  
         }
 955  651731
         boolean found = false;
 956  1446818
         while (!found) {
 957  795087
             int ff = 0;
 958  1335502
             while (reader.peek(ff) == ' ') {
 959  540415
                 ff++;
 960  
             }
 961  795087
             if (ff > 0) {
 962  376974
                 reader.forward(ff);
 963  
             }
 964  
 
 965  795087
             if (reader.peek() == '#') {
 966  557
                 ff = 0;
 967  10345
                 while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
 968  9788
                     ff++;
 969  
                 }
 970  557
                 if (ff > 0) {
 971  557
                     reader.forward(ff);
 972  
                 }
 973  
             }
 974  795087
             if (scanLineBreak().length() != 0) {
 975  143356
                 if (this.flowLevel == 0) {
 976  140509
                     this.allowSimpleKey = true;
 977  
                 }
 978  
             } else {
 979  651731
                 found = true;
 980  
             }
 981  795087
         }
 982  651731
     }
 983  
 
 984  
     @SuppressWarnings({ "unchecked", "rawtypes" })
 985  
     private Token scanDirective() {
 986  
         // See the specification for details.
 987  1810
         Mark startMark = reader.getMark();
 988  
         Mark endMark;
 989  1810
         reader.forward();
 990  1810
         String name = scanDirectiveName(startMark);
 991  1806
         List<?> value = null;
 992  1806
         if ("YAML".equals(name)) {
 993  1640
             value = scanYamlDirectiveValue(startMark);
 994  1628
             endMark = reader.getMark();
 995  166
         } else if ("TAG".equals(name)) {
 996  161
             value = scanTagDirectiveValue(startMark);
 997  153
             endMark = reader.getMark();
 998  
         } else {
 999  5
             endMark = reader.getMark();
 1000  5
             int ff = 0;
 1001  150
             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
 1002  145
                 ff++;
 1003  
             }
 1004  5
             if (ff > 0) {
 1005  5
                 reader.forward(ff);
 1006  
             }
 1007  
         }
 1008  1786
         scanDirectiveIgnoredLine(startMark);
 1009  1784
         return new DirectiveToken(name, value, startMark, endMark);
 1010  
     }
 1011  
 
 1012  
     private String scanDirectiveName(Mark startMark) {
 1013  
         // See the specification for details.
 1014  1810
         int length = 0;
 1015  1810
         char ch = reader.peek(length);
 1016  8904
         while (Constant.ALPHA.has(ch)) {
 1017  7094
             length++;
 1018  7094
             ch = reader.peek(length);
 1019  
         }
 1020  1810
         if (length == 0) {
 1021  2
             throw new ScannerException("while scanning a directive", startMark,
 1022  
                     "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
 1023  
                             + ")", reader.getMark());
 1024  
         }
 1025  1808
         String value = reader.prefixForward(length);
 1026  1808
         ch = reader.peek();
 1027  1808
         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
 1028  2
             throw new ScannerException("while scanning a directive", startMark,
 1029  
                     "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
 1030  
                             + ")", reader.getMark());
 1031  
         }
 1032  1806
         return value;
 1033  
     }
 1034  
 
 1035  
     private List<Integer> scanYamlDirectiveValue(Mark startMark) {
 1036  
         // See the specification for details.
 1037  3302
         while (reader.peek() == ' ') {
 1038  1662
             reader.forward();
 1039  
         }
 1040  1640
         Integer major = scanYamlDirectiveNumber(startMark);
 1041  1636
         if (reader.peek() != '.') {
 1042  2
             throw new ScannerException("while scanning a directive", startMark,
 1043  
                     "expected a digit or '.', but found " + reader.peek() + "("
 1044  
                             + ((int) reader.peek()) + ")", reader.getMark());
 1045  
         }
 1046  1634
         reader.forward();
 1047  1634
         Integer minor = scanYamlDirectiveNumber(startMark);
 1048  1630
         if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
 1049  2
             throw new ScannerException("while scanning a directive", startMark,
 1050  
                     "expected a digit or ' ', but found " + reader.peek() + "("
 1051  
                             + ((int) reader.peek()) + ")", reader.getMark());
 1052  
         }
 1053  1628
         List<Integer> result = new ArrayList<Integer>(2);
 1054  1628
         result.add(major);
 1055  1628
         result.add(minor);
 1056  1628
         return result;
 1057  
     }
 1058  
 
 1059  
     private Integer scanYamlDirectiveNumber(Mark startMark) {
 1060  
         // See the specification for details.
 1061  3274
         char ch = reader.peek();
 1062  3274
         if (!Character.isDigit(ch)) {
 1063  8
             throw new ScannerException("while scanning a directive", startMark,
 1064  
                     "expected a digit, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
 1065  
         }
 1066  3266
         int length = 0;
 1067  6540
         while (Character.isDigit(reader.peek(length))) {
 1068  3274
             length++;
 1069  
         }
 1070  3266
         Integer value = new Integer(reader.prefixForward(length));
 1071  3266
         return value;
 1072  
     }
 1073  
 
 1074  
     private List<String> scanTagDirectiveValue(Mark startMark) {
 1075  
         // See the specification for details.
 1076  352
         while (reader.peek() == ' ') {
 1077  191
             reader.forward();
 1078  
         }
 1079  161
         String handle = scanTagDirectiveHandle(startMark);
 1080  382
         while (reader.peek() == ' ') {
 1081  227
             reader.forward();
 1082  
         }
 1083  155
         String prefix = scanTagDirectivePrefix(startMark);
 1084  153
         List<String> result = new ArrayList<String>(2);
 1085  153
         result.add(handle);
 1086  153
         result.add(prefix);
 1087  153
         return result;
 1088  
     }
 1089  
 
 1090  
     private String scanTagDirectiveHandle(Mark startMark) {
 1091  
         // See the specification for details.
 1092  161
         String value = scanTagHandle("directive", startMark);
 1093  157
         char ch = reader.peek();
 1094  157
         if (ch != ' ') {
 1095  2
             throw new ScannerException("while scanning a directive", startMark,
 1096  
                     "expected ' ', but found " + reader.peek() + "(" + ch + ")", reader.getMark());
 1097  
         }
 1098  155
         return value;
 1099  
     }
 1100  
 
 1101  
     private String scanTagDirectivePrefix(Mark startMark) {
 1102  
         // See the specification for details.
 1103  155
         String value = scanTagUri("directive", startMark);
 1104  155
         if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
 1105  2
             throw new ScannerException("while scanning a directive", startMark,
 1106  
                     "expected ' ', but found " + reader.peek() + "(" + ((int) reader.peek()) + ")",
 1107  
                     reader.getMark());
 1108  
         }
 1109  153
         return value;
 1110  
     }
 1111  
 
 1112  
     private String scanDirectiveIgnoredLine(Mark startMark) {
 1113  
         // See the specification for details.
 1114  1786
         int ff = 0;
 1115  1793
         while (reader.peek(ff) == ' ') {
 1116  7
             ff++;
 1117  
         }
 1118  1786
         if (ff > 0) {
 1119  7
             reader.forward(ff);
 1120  
         }
 1121  1786
         if (reader.peek() == '#') {
 1122  5
             ff = 0;
 1123  90
             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
 1124  85
                 ff++;
 1125  
             }
 1126  5
             reader.forward(ff);
 1127  
         }
 1128  1786
         char ch = reader.peek();
 1129  1786
         String lineBreak = scanLineBreak();
 1130  1786
         if (lineBreak.length() == 0 && ch != '\0') {
 1131  2
             throw new ScannerException("while scanning a directive", startMark,
 1132  
                     "expected a comment or a line break, but found " + ch + "(" + ((int) ch) + ")",
 1133  
                     reader.getMark());
 1134  
         }
 1135  1784
         return lineBreak;
 1136  
     }
 1137  
 
 1138  
     /**
 1139  
      * <pre>
 1140  
      * The specification does not restrict characters for anchors and
 1141  
      * aliases. This may lead to problems, for instance, the document:
 1142  
      *   [ *alias, value ]
 1143  
      * can be interpreted in two ways, as
 1144  
      *   [ &quot;value&quot; ]
 1145  
      * and
 1146  
      *   [ *alias , &quot;value&quot; ]
 1147  
      * Therefore we restrict aliases to numbers and ASCII letters.
 1148  
      * </pre>
 1149  
      */
 1150  
     private Token scanAnchor(boolean isAnchor) {
 1151  2797
         Mark startMark = reader.getMark();
 1152  2797
         char indicator = reader.peek();
 1153  2797
         String name = indicator == '*' ? "alias" : "anchor";
 1154  2797
         reader.forward();
 1155  2797
         int length = 0;
 1156  2797
         char ch = reader.peek(length);
 1157  16116
         while (Constant.ALPHA.has(ch)) {
 1158  13319
             length++;
 1159  13319
             ch = reader.peek(length);
 1160  
         }
 1161  2797
         if (length == 0) {
 1162  2
             throw new ScannerException("while scanning an " + name, startMark,
 1163  
                     "expected alphabetic or numeric character, but found but found " + ch,
 1164  
                     reader.getMark());
 1165  
         }
 1166  2795
         String value = reader.prefixForward(length);
 1167  2795
         ch = reader.peek();
 1168  2795
         if (Constant.NULL_BL_T_LINEBR.hasNo(ch, "?:,]}%@`")) {
 1169  2
             throw new ScannerException("while scanning an " + name, startMark,
 1170  
                     "expected alphabetic or numeric character, but found " + ch + "("
 1171  
                             + ((int) reader.peek()) + ")", reader.getMark());
 1172  
         }
 1173  2793
         Mark endMark = reader.getMark();
 1174  
         Token tok;
 1175  2793
         if (isAnchor) {
 1176  1368
             tok = new AnchorToken(value, startMark, endMark);
 1177  
         } else {
 1178  1425
             tok = new AliasToken(value, startMark, endMark);
 1179  
         }
 1180  2793
         return tok;
 1181  
     }
 1182  
 
 1183  
     private Token scanTag() {
 1184  
         // See the specification for details.
 1185  12164
         Mark startMark = reader.getMark();
 1186  12164
         char ch = reader.peek(1);
 1187  12164
         String handle = null;
 1188  12164
         String suffix = null;
 1189  12164
         if (ch == '<') {
 1190  137
             reader.forward(2);
 1191  137
             suffix = scanTagUri("tag", startMark);
 1192  131
             if (reader.peek() != '>') {
 1193  2
                 throw new ScannerException("while scanning a tag", startMark,
 1194  
                         "expected '>', but found '" + reader.peek() + "' (" + ((int) reader.peek())
 1195  
                                 + ")", reader.getMark());
 1196  
             }
 1197  129
             reader.forward();
 1198  12027
         } else if (Constant.NULL_BL_T_LINEBR.has(ch)) {
 1199  2053
             suffix = "!";
 1200  2053
             reader.forward();
 1201  
         } else {
 1202  9974
             int length = 1;
 1203  9974
             boolean useHandle = false;
 1204  37167
             while (Constant.NULL_BL_LINEBR.hasNo(ch)) {
 1205  36035
                 if (ch == '!') {
 1206  8842
                     useHandle = true;
 1207  8842
                     break;
 1208  
                 }
 1209  27193
                 length++;
 1210  27193
                 ch = reader.peek(length);
 1211  
             }
 1212  9974
             handle = "!";
 1213  9974
             if (useHandle) {
 1214  8842
                 handle = scanTagHandle("tag", startMark);
 1215  
             } else {
 1216  1132
                 handle = "!";
 1217  1132
                 reader.forward();
 1218  
             }
 1219  9974
             suffix = scanTagUri("tag", startMark);
 1220  
         }
 1221  12151
         ch = reader.peek();
 1222  12151
         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
 1223  2
             throw new ScannerException("while scanning a tag", startMark,
 1224  
                     "expected ' ', but found '" + ch + "' (" + ((int) ch) + ")", reader.getMark());
 1225  
         }
 1226  12149
         TagTuple value = new TagTuple(handle, suffix);
 1227  12149
         Mark endMark = reader.getMark();
 1228  12149
         return new TagToken(value, startMark, endMark);
 1229  
     }
 1230  
 
 1231  
     private Token scanBlockScalar(char style) {
 1232  
         // See the specification for details.
 1233  
         boolean folded;
 1234  1188
         if (style == '>') {
 1235  554
             folded = true;
 1236  
         } else {
 1237  634
             folded = false;
 1238  
         }
 1239  1188
         StringBuilder chunks = new StringBuilder();
 1240  1188
         Mark startMark = reader.getMark();
 1241  
         // Scan the header.
 1242  1188
         reader.forward();
 1243  1188
         Chomping chompi = scanBlockScalarIndicators(startMark);
 1244  1184
         int increment = chompi.getIncrement();
 1245  1184
         scanBlockScalarIgnoredLine(startMark);
 1246  
 
 1247  
         // Determine the indentation level and go to the first non-empty line.
 1248  1182
         int minIndent = this.indent + 1;
 1249  1182
         if (minIndent < 1) {
 1250  375
             minIndent = 1;
 1251  
         }
 1252  1182
         String breaks = null;
 1253  1182
         int maxIndent = 0;
 1254  1182
         int indent = 0;
 1255  
         Mark endMark;
 1256  1182
         if (increment == -1) {
 1257  1099
             Object[] brme = scanBlockScalarIndentation();
 1258  1099
             breaks = (String) brme[0];
 1259  1099
             maxIndent = ((Integer) brme[1]).intValue();
 1260  1099
             endMark = (Mark) brme[2];
 1261  1099
             indent = Math.max(minIndent, maxIndent);
 1262  1099
         } else {
 1263  83
             indent = minIndent + increment - 1;
 1264  83
             Object[] brme = scanBlockScalarBreaks(indent);
 1265  83
             breaks = (String) brme[0];
 1266  83
             endMark = (Mark) brme[1];
 1267  
         }
 1268  
 
 1269  1182
         String lineBreak = "";
 1270  
 
 1271  
         // Scan the inner part of the block scalar.
 1272  1966
         while (this.reader.getColumn() == indent && reader.peek() != '\0') {
 1273  1946
             chunks.append(breaks);
 1274  1946
             boolean leadingNonSpace = " \t".indexOf(reader.peek()) == -1;
 1275  1946
             int length = 0;
 1276  32069
             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(length))) {
 1277  30123
                 length++;
 1278  
             }
 1279  1946
             chunks.append(reader.prefixForward(length));
 1280  1946
             lineBreak = scanLineBreak();
 1281  1946
             Object[] brme = scanBlockScalarBreaks(indent);
 1282  1946
             breaks = (String) brme[0];
 1283  1946
             endMark = (Mark) brme[1];
 1284  1946
             if (this.reader.getColumn() == indent && reader.peek() != '\0') {
 1285  
 
 1286  
                 // Unfortunately, folding rules are ambiguous.
 1287  
                 //
 1288  
                 // This is the folding according to the specification:
 1289  784
                 if (folded && "\n".equals(lineBreak) && leadingNonSpace
 1290  
                         && " \t".indexOf(reader.peek()) == -1) {
 1291  184
                     if (breaks.length() == 0) {
 1292  92
                         chunks.append(" ");
 1293  
                     }
 1294  
                 } else {
 1295  600
                     chunks.append(lineBreak);
 1296  
                 }
 1297  
                 // Clark Evans's interpretation (also in the spec examples) not
 1298  
                 // imported from PyYAML
 1299  
             } else {
 1300  
                 break;
 1301  
             }
 1302  784
         }
 1303  
         // Chomp the tail.
 1304  1182
         if (chompi.chompTailIsNotFalse()) {
 1305  413
             chunks.append(lineBreak);
 1306  
         }
 1307  1182
         if (chompi.chompTailIsTrue()) {
 1308  35
             chunks.append(breaks);
 1309  
         }
 1310  
         // We are done.
 1311  1182
         return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
 1312  
     }
 1313  
 
 1314  
     private Chomping scanBlockScalarIndicators(Mark startMark) {
 1315  
         // See the specification for details.
 1316  1188
         Boolean chomping = null;
 1317  1188
         int increment = -1;
 1318  1188
         char ch = reader.peek();
 1319  1188
         if (ch == '-' || ch == '+') {
 1320  780
             if (ch == '+') {
 1321  30
                 chomping = Boolean.TRUE;
 1322  
             } else {
 1323  750
                 chomping = Boolean.FALSE;
 1324  
             }
 1325  780
             reader.forward();
 1326  780
             ch = reader.peek();
 1327  780
             if (Character.isDigit(ch)) {
 1328  7
                 increment = Integer.parseInt(String.valueOf(ch));
 1329  7
                 if (increment == 0) {
 1330  2
                     throw new ScannerException("while scanning a block scalar", startMark,
 1331  
                             "expected indentation indicator in the range 1-9, but found 0",
 1332  
                             reader.getMark());
 1333  
                 }
 1334  5
                 reader.forward();
 1335  
             }
 1336  408
         } else if (Character.isDigit(ch)) {
 1337  80
             increment = Integer.parseInt(String.valueOf(ch));
 1338  80
             if (increment == 0) {
 1339  2
                 throw new ScannerException("while scanning a block scalar", startMark,
 1340  
                         "expected indentation indicator in the range 1-9, but found 0",
 1341  
                         reader.getMark());
 1342  
             }
 1343  78
             reader.forward();
 1344  78
             ch = reader.peek();
 1345  78
             if (ch == '-' || ch == '+') {
 1346  26
                 if (ch == '+') {
 1347  5
                     chomping = Boolean.TRUE;
 1348  
                 } else {
 1349  21
                     chomping = Boolean.FALSE;
 1350  
                 }
 1351  26
                 reader.forward();
 1352  
             }
 1353  
         }
 1354  1184
         ch = reader.peek();
 1355  1184
         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
 1356  0
             throw new ScannerException("while scanning a block scalar", startMark,
 1357  
                     "expected chomping or indentation indicators, but found " + ch,
 1358  
                     reader.getMark());
 1359  
         }
 1360  1184
         return new Chomping(chomping, increment);
 1361  
     }
 1362  
 
 1363  
     private String scanBlockScalarIgnoredLine(Mark startMark) {
 1364  
         // See the specification for details.
 1365  1184
         int ff = 0;
 1366  1216
         while (reader.peek(ff) == ' ') {
 1367  32
             ff++;
 1368  
         }
 1369  1184
         if (ff > 0) {
 1370  32
             reader.forward(ff);
 1371  
         }
 1372  
 
 1373  1184
         if (reader.peek() == '#') {
 1374  30
             ff = 0;
 1375  625
             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
 1376  595
                 ff++;
 1377  
             }
 1378  30
             if (ff > 0) {
 1379  30
                 reader.forward(ff);
 1380  
             }
 1381  
         }
 1382  1184
         char ch = reader.peek();
 1383  1184
         String lineBreak = scanLineBreak();
 1384  1184
         if (lineBreak.length() == 0 && ch != '\0') {
 1385  2
             throw new ScannerException("while scanning a block scalar", startMark,
 1386  
                     "expected a comment or a line break, but found " + ch, reader.getMark());
 1387  
         }
 1388  1182
         return lineBreak;
 1389  
     }
 1390  
 
 1391  
     private Object[] scanBlockScalarIndentation() {
 1392  
         // See the specification for details.
 1393  1099
         StringBuilder chunks = new StringBuilder();
 1394  1099
         int maxIndent = 0;
 1395  1099
         Mark endMark = reader.getMark();
 1396  4045
         while (Constant.LINEBR.has(reader.peek(), " \r")) {
 1397  2946
             if (reader.peek() != ' ') {
 1398  55
                 chunks.append(scanLineBreak());
 1399  55
                 endMark = reader.getMark();
 1400  
             } else {
 1401  2891
                 reader.forward();
 1402  2891
                 if (this.reader.getColumn() > maxIndent) {
 1403  2831
                     maxIndent = reader.getColumn();
 1404  
                 }
 1405  
             }
 1406  
         }
 1407  1099
         return new Object[] { chunks.toString(), maxIndent, endMark };
 1408  
     }
 1409  
 
 1410  
     private Object[] scanBlockScalarBreaks(int indent) {
 1411  
         // See the specification for details.
 1412  2029
         StringBuilder chunks = new StringBuilder();
 1413  2029
         Mark endMark = reader.getMark();
 1414  2029
         int ff = 0;
 1415  2029
         int col = this.reader.getColumn();
 1416  4038
         while (col < indent && reader.peek(ff) == ' ') {
 1417  2009
             ff++;
 1418  2009
             col++;
 1419  
         }
 1420  2029
         if (ff > 0) {
 1421  742
             reader.forward(ff);
 1422  
         }
 1423  
 
 1424  2029
         String lineBreak = null;
 1425  2487
         while ((lineBreak = scanLineBreak()).length() != 0) {
 1426  458
             chunks.append(lineBreak);
 1427  458
             endMark = reader.getMark();
 1428  458
             ff = 0;
 1429  458
             col = this.reader.getColumn();
 1430  1046
             while (col < indent && reader.peek(ff) == ' ') {
 1431  588
                 ff++;
 1432  588
                 col++;
 1433  
             }
 1434  458
             if (ff > 0) {
 1435  337
                 reader.forward(ff);
 1436  
             }
 1437  
         }
 1438  2029
         return new Object[] { chunks.toString(), endMark };
 1439  
     }
 1440  
 
 1441  
     /**
 1442  
      * <pre>
 1443  
      * See the specification for details.
 1444  
      * Note that we loose indentation rules for quoted scalars. Quoted
 1445  
      * scalars don't need to adhere indentation because &quot; and ' clearly
 1446  
      * mark the beginning and the end of them. Therefore we are less
 1447  
      * restrictive then the specification requires. We only need to check
 1448  
      * that document separators are not included in scalars.
 1449  
      * </pre>
 1450  
      */
 1451  
     private Token scanFlowScalar(char style) {
 1452  
         boolean _double;
 1453  209346
         if (style == '"') {
 1454  6067
             _double = true;
 1455  
         } else {
 1456  203279
             _double = false;
 1457  
         }
 1458  209346
         StringBuilder chunks = new StringBuilder();
 1459  209346
         Mark startMark = reader.getMark();
 1460  209346
         char quote = reader.peek();
 1461  209346
         reader.forward();
 1462  209346
         chunks.append(scanFlowScalarNonSpaces(_double, startMark));
 1463  215684
         while (reader.peek() != quote) {
 1464  6346
             chunks.append(scanFlowScalarSpaces(startMark));
 1465  6342
             chunks.append(scanFlowScalarNonSpaces(_double, startMark));
 1466  
         }
 1467  209338
         reader.forward();
 1468  209338
         Mark endMark = reader.getMark();
 1469  209338
         return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
 1470  
     }
 1471  
 
 1472  
     private String scanFlowScalarNonSpaces(boolean _double, Mark startMark) {
 1473  
         // See the specification for details.
 1474  215688
         StringBuilder chunks = new StringBuilder();
 1475  
         while (true) {
 1476  218753
             int length = 0;
 1477  768436
             while (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length), "\'\"\\")) {
 1478  549683
                 length++;
 1479  
             }
 1480  218753
             if (length != 0) {
 1481  214660
                 chunks.append(reader.prefixForward(length));
 1482  
             }
 1483  218753
             char ch = reader.peek();
 1484  218753
             if (!_double && ch == '\'' && reader.peek(1) == '\'') {
 1485  24
                 chunks.append("'");
 1486  24
                 reader.forward(2);
 1487  218729
             } else if ((_double && ch == '\'') || (!_double && "\"\\".indexOf(ch) != -1)) {
 1488  85
                 chunks.append(ch);
 1489  85
                 reader.forward();
 1490  218644
             } else if (_double && ch == '\\') {
 1491  2960
                 reader.forward();
 1492  2960
                 ch = reader.peek();
 1493  2960
                 if (ESCAPE_REPLACEMENTS.containsKey(new Character(ch))) {
 1494  2487
                     chunks.append(ESCAPE_REPLACEMENTS.get(new Character(ch)));
 1495  2487
                     reader.forward();
 1496  473
                 } else if (ESCAPE_CODES.containsKey(new Character(ch))) {
 1497  132
                     length = (ESCAPE_CODES.get(new Character(ch))).intValue();
 1498  132
                     reader.forward();
 1499  132
                     String hex = reader.prefix(length);
 1500  132
                     if (NOT_HEXA.matcher(hex).find()) {
 1501  2
                         throw new ScannerException("while scanning a double-quoted scalar",
 1502  
                                 startMark, "expected escape sequence of " + length
 1503  
                                         + " hexadecimal numbers, but found: " + hex,
 1504  
                                 reader.getMark());
 1505  
                     }
 1506  130
                     char unicode = (char) Integer.parseInt(hex, 16);
 1507  130
                     chunks.append(unicode);
 1508  130
                     reader.forward(length);
 1509  130
                 } else if (scanLineBreak().length() != 0) {
 1510  339
                     chunks.append(scanFlowScalarBreaks(startMark));
 1511  
                 } else {
 1512  2
                     throw new ScannerException("while scanning a double-quoted scalar", startMark,
 1513  
                             "found unknown escape character " + ch + "(" + ((int) ch) + ")",
 1514  
                             reader.getMark());
 1515  
                 }
 1516  
             } else {
 1517  215684
                 return chunks.toString();
 1518  
             }
 1519  3065
         }
 1520  
     }
 1521  
 
 1522  
     private String scanFlowScalarSpaces(Mark startMark) {
 1523  
         // See the specification for details.
 1524  6346
         StringBuilder chunks = new StringBuilder();
 1525  6346
         int length = 0;
 1526  12383
         while (" \t".indexOf(reader.peek(length)) != -1) {
 1527  6037
             length++;
 1528  
         }
 1529  6346
         String whitespaces = reader.prefixForward(length);
 1530  6346
         char ch = reader.peek();
 1531  6346
         if (ch == '\0') {
 1532  2
             throw new ScannerException("while scanning a quoted scalar", startMark,
 1533  
                     "found unexpected end of stream", reader.getMark());
 1534  
         }
 1535  6344
         String lineBreak = scanLineBreak();
 1536  6344
         if (lineBreak.length() != 0) {
 1537  637
             String breaks = scanFlowScalarBreaks(startMark);
 1538  635
             if (!"\n".equals(lineBreak)) {
 1539  67
                 chunks.append(lineBreak);
 1540  568
             } else if (breaks.length() == 0) {
 1541  203
                 chunks.append(" ");
 1542  
             }
 1543  635
             chunks.append(breaks);
 1544  635
         } else {
 1545  5707
             chunks.append(whitespaces);
 1546  
         }
 1547  6342
         return chunks.toString();
 1548  
     }
 1549  
 
 1550  
     private String scanFlowScalarBreaks(Mark startMark) {
 1551  
         // See the specification for details.
 1552  976
         StringBuilder chunks = new StringBuilder();
 1553  
         while (true) {
 1554  
             // Instead of checking indentation, we check for document
 1555  
             // separators.
 1556  1469
             String prefix = reader.prefix(3);
 1557  1469
             if (("---".equals(prefix) || "...".equals(prefix))
 1558  
                     && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 1559  2
                 throw new ScannerException("while scanning a quoted scalar", startMark,
 1560  
                         "found unexpected document separator", reader.getMark());
 1561  
             }
 1562  4413
             while (" \t".indexOf(reader.peek()) != -1) {
 1563  2946
                 reader.forward();
 1564  
             }
 1565  1467
             String lineBreak = scanLineBreak();
 1566  1467
             if (lineBreak.length() != 0) {
 1567  493
                 chunks.append(lineBreak);
 1568  
             } else {
 1569  974
                 return chunks.toString();
 1570  
             }
 1571  493
         }
 1572  
     }
 1573  
 
 1574  
     /**
 1575  
      * <pre>
 1576  
      * See the specification for details.
 1577  
      * We add an additional restriction for the flow context:
 1578  
      *   plain scalars in the flow context cannot contain ',', ':' and '?'.
 1579  
      * We also keep track of the `allow_simple_key` flag here.
 1580  
      * Indentation rules are loosed for the flow context.
 1581  
      * </pre>
 1582  
      */
 1583  
     private Token scanPlain() {
 1584  151513
         StringBuilder chunks = new StringBuilder();
 1585  151513
         Mark startMark = reader.getMark();
 1586  151513
         Mark endMark = startMark;
 1587  151513
         int indent = this.indent + 1;
 1588  151513
         String spaces = "";
 1589  
         while (true) {
 1590  
             char ch;
 1591  153245
             int length = 0;
 1592  153245
             if (reader.peek() == '#') {
 1593  0
                 break;
 1594  
             }
 1595  
             while (true) {
 1596  749004
                 ch = reader.peek(length);
 1597  749004
                 if (Constant.NULL_BL_T_LINEBR.has(ch)
 1598  
                         || (this.flowLevel == 0 && ch == ':' && Constant.NULL_BL_T_LINEBR
 1599  
                                 .has(reader.peek(length + 1)))
 1600  
                         || (this.flowLevel != 0 && ",:?[]{}".indexOf(ch) != -1)) {
 1601  4347
                     break;
 1602  
                 }
 1603  595759
                 length++;
 1604  
             }
 1605  
             // It's not clear what we should do with ':' in the flow context.
 1606  153245
             if (this.flowLevel != 0 && ch == ':'
 1607  
                     && Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length + 1), ",[]{}")) {
 1608  2
                 reader.forward(length);
 1609  2
                 throw new ScannerException("while scanning a plain scalar", startMark,
 1610  
                         "found unexpected ':'", reader.getMark(),
 1611  
                         "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.");
 1612  
             }
 1613  153243
             if (length == 0) {
 1614  380
                 break;
 1615  
             }
 1616  152863
             this.allowSimpleKey = false;
 1617  152863
             chunks.append(spaces);
 1618  152863
             chunks.append(reader.prefixForward(length));
 1619  152863
             endMark = reader.getMark();
 1620  152863
             spaces = scanPlainSpaces();
 1621  
             // System.out.printf("spaces[%s]\n", spaces);
 1622  152863
             if (spaces.length() == 0 || reader.peek() == '#'
 1623  
                     || (this.flowLevel == 0 && this.reader.getColumn() < indent)) {
 1624  112545
                 break;
 1625  
             }
 1626  1732
         }
 1627  151511
         return new ScalarToken(chunks.toString(), startMark, endMark, true);
 1628  
     }
 1629  
 
 1630  
     /**
 1631  
      * <pre>
 1632  
      * See the specification for details.
 1633  
      * The specification is really confusing about tabs in plain scalars.
 1634  
      * We just forbid them completely. Do not use tabs in YAML!
 1635  
      * </pre>
 1636  
      */
 1637  
     private String scanPlainSpaces() {
 1638  152863
         int length = 0;
 1639  154758
         while (reader.peek(length) == ' ') {
 1640  1895
             length++;
 1641  
         }
 1642  152863
         String whitespaces = reader.prefixForward(length);
 1643  152863
         String lineBreak = scanLineBreak();
 1644  152863
         if (lineBreak.length() != 0) {
 1645  112794
             this.allowSimpleKey = true;
 1646  112794
             String prefix = reader.prefix(3);
 1647  112794
             if ("---".equals(prefix) || "...".equals(prefix)
 1648  
                     && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 1649  85
                 return "";
 1650  
             }
 1651  112709
             StringBuilder breaks = new StringBuilder();
 1652  
             while (true) {
 1653  500951
                 if (reader.peek() == ' ') {
 1654  388190
                     reader.forward();
 1655  
                 } else {
 1656  112761
                     String lb = scanLineBreak();
 1657  112761
                     if (lb.length() != 0) {
 1658  52
                         breaks.append(lb);
 1659  52
                         prefix = reader.prefix(3);
 1660  52
                         if ("---".equals(prefix) || "...".equals(prefix)
 1661  
                                 && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 1662  0
                             return "";
 1663  
                         }
 1664  
                     } else {
 1665  
                         break;
 1666  
                     }
 1667  52
                 }
 1668  
             }
 1669  112709
             if (!"\n".equals(lineBreak)) {
 1670  5
                 return lineBreak + breaks;
 1671  112704
             } else if (breaks.length() == 0) {
 1672  112657
                 return " ";
 1673  
             }
 1674  47
             return breaks.toString();
 1675  
         }
 1676  40069
         return whitespaces;
 1677  
     }
 1678  
 
 1679  
     /**
 1680  
      * <pre>
 1681  
      * See the specification for details.
 1682  
      * For some strange reasons, the specification does not allow '_' in
 1683  
      * tag handles. I have allowed it anyway.
 1684  
      * </pre>
 1685  
      */
 1686  
     private String scanTagHandle(String name, Mark startMark) {
 1687  9003
         char ch = reader.peek();
 1688  9003
         if (ch != '!') {
 1689  2
             throw new ScannerException("while scanning a " + name, startMark,
 1690  
                     "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
 1691  
         }
 1692  9001
         int length = 1;
 1693  9001
         ch = reader.peek(length);
 1694  9001
         if (ch != ' ') {
 1695  9302
             while (Constant.ALPHA.has(ch)) {
 1696  370
                 length++;
 1697  370
                 ch = reader.peek(length);
 1698  
             }
 1699  8932
             if (ch != '!') {
 1700  2
                 reader.forward(length);
 1701  2
                 throw new ScannerException("while scanning a " + name, startMark,
 1702  
                         "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
 1703  
             }
 1704  8930
             length++;
 1705  
         }
 1706  8999
         String value = reader.prefixForward(length);
 1707  8999
         return value;
 1708  
     }
 1709  
 
 1710  
     private String scanTagUri(String name, Mark startMark) {
 1711  
         // See the specification for details.
 1712  
         // Note: we do not check if URI is well-formed.
 1713  10266
         StringBuilder chunks = new StringBuilder();
 1714  10266
         int length = 0;
 1715  10266
         char ch = reader.peek(length);
 1716  111701
         while (Constant.URI_CHARS.has(ch)) {
 1717  101444
             if (ch == '%') {
 1718  11
                 chunks.append(reader.prefixForward(length));
 1719  11
                 length = 0;
 1720  11
                 chunks.append(scanUriEscapes(name, startMark));
 1721  
             } else {
 1722  101433
                 length++;
 1723  
             }
 1724  101435
             ch = reader.peek(length);
 1725  
         }
 1726  10257
         if (length != 0) {
 1727  10254
             chunks.append(reader.prefixForward(length));
 1728  10254
             length = 0;
 1729  
         }
 1730  10257
         if (chunks.length() == 0) {
 1731  2
             throw new ScannerException("while scanning a " + name, startMark,
 1732  
                     "expected URI, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
 1733  
         }
 1734  10255
         return chunks.toString();
 1735  
     }
 1736  
 
 1737  
     private String scanUriEscapes(String name, Mark startMark) {
 1738  
         // First, look ahead to see how many URI-escaped characters we should
 1739  
         // expect, so we can use the correct buffer size.
 1740  11
         int length = 1;
 1741  1044
         while (reader.peek(length * 3) == '%') {
 1742  1033
             length++;
 1743  
         }
 1744  
         // See the specification for details.
 1745  
         // URIs containing 16 and 32 bit Unicode characters are
 1746  
         // encoded in UTF-8, and then each octet is written as a
 1747  
         // separate character.
 1748  11
         Mark beginningMark = reader.getMark();
 1749  11
         ByteBuffer buff = ByteBuffer.allocate(length);
 1750  1050
         while (reader.peek() == '%') {
 1751  1043
             reader.forward();
 1752  
             try {
 1753  1043
                 byte code = (byte) Integer.parseInt(reader.prefix(2), 16);
 1754  1039
                 buff.put(code);
 1755  4
             } catch (NumberFormatException nfe) {
 1756  4
                 throw new ScannerException("while scanning a " + name, startMark,
 1757  
                         "expected URI escape sequence of 2 hexadecimal numbers, but found "
 1758  
                                 + reader.peek() + "(" + ((int) reader.peek()) + ") and "
 1759  
                                 + reader.peek(1) + "(" + ((int) reader.peek(1)) + ")",
 1760  
                         reader.getMark());
 1761  1039
             }
 1762  1039
             reader.forward(2);
 1763  
         }
 1764  7
         buff.flip();
 1765  
         try {
 1766  7
             return UriEncoder.decode(buff);
 1767  5
         } catch (CharacterCodingException e) {
 1768  5
             throw new ScannerException("while scanning a " + name, startMark,
 1769  
                     "expected URI in UTF-8: " + e.getMessage(), beginningMark);
 1770  
         }
 1771  
     }
 1772  
 
 1773  
     private String scanLineBreak() {
 1774  
         // Transforms:
 1775  
         // '\r\n' : '\n'
 1776  
         // '\r' : '\n'
 1777  
         // '\n' : '\n'
 1778  
         // '\x85' : '\n'
 1779  
         // default : ''
 1780  1076321
         char ch = reader.peek();
 1781  1076321
         if (ch == '\r' || ch == '\n' || ch == '\u0085') {
 1782  262910
             if (ch == '\r' && '\n' == reader.peek(1)) {
 1783  540
                 reader.forward(2);
 1784  
             } else {
 1785  262370
                 reader.forward();
 1786  
             }
 1787  262910
             return "\n";
 1788  813411
         } else if (ch == '\u2028' || ch == '\u2029') {
 1789  185
             reader.forward();
 1790  185
             return String.valueOf(ch);
 1791  
         }
 1792  813226
         return "";
 1793  
     }
 1794  
 
 1795  
     /**
 1796  
      * Chomping the tail may have 3 values - yes, no, not defined.
 1797  
      */
 1798  
     private class Chomping {
 1799  
         private final Boolean value;
 1800  
         private final int increment;
 1801  
 
 1802  1184
         public Chomping(Boolean value, int increment) {
 1803  1184
             this.value = value;
 1804  1184
             this.increment = increment;
 1805  1184
         }
 1806  
 
 1807  
         public boolean chompTailIsNotFalse() {
 1808  1182
             return value == null || value;
 1809  
         }
 1810  
 
 1811  
         public boolean chompTailIsTrue() {
 1812  1182
             return value != null && value;
 1813  
         }
 1814  
 
 1815  
         public int getIncrement() {
 1816  1184
             return increment;
 1817  
         }
 1818  
     }
 1819  
 }