1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 package org.jaxen.saxpath.base;
66
67 class XPathLexer
68 {
69 private String xpath;
70 private int currentPosition;
71 private int endPosition;
72
73 private Token previousToken;
74
75 public XPathLexer(String xpath)
76 {
77 setXPath( xpath );
78 }
79
80 XPathLexer()
81 {
82 }
83
84 void setXPath(String xpath)
85 {
86 this.xpath = xpath;
87 this.currentPosition = 0;
88 this.endPosition = xpath.length();
89 }
90
91 public String getXPath()
92 {
93 return this.xpath;
94 }
95
96 public Token nextToken()
97 {
98 Token token = null;
99
100 do
101 {
102 token = null;
103
104 switch ( LA(1) )
105 {
106 case '$':
107 {
108 token = dollar();
109 break;
110 }
111
112 case '"':
113 case '\'':
114 {
115 token = literal();
116 break;
117 }
118
119 case '/':
120 {
121 token = slashes();
122 break;
123 }
124
125 case ',':
126 {
127 token = comma();
128 break;
129 }
130
131 case '(':
132 {
133 token = leftParen();
134 break;
135 }
136
137 case ')':
138 {
139 token = rightParen();
140 break;
141 }
142
143 case '[':
144 {
145 token = leftBracket();
146 break;
147 }
148
149 case ']':
150 {
151 token = rightBracket();
152 break;
153 }
154
155 case '+':
156 {
157 token = plus();
158 break;
159 }
160
161 case '-':
162 {
163 token = minus();
164 break;
165 }
166
167 case '<':
168 case '>':
169 {
170 token = relationalOperator();
171 break;
172 }
173
174 case '=':
175 {
176 token = equals();
177 break;
178 }
179
180 case '!':
181 {
182 if ( LA(2) == '=' )
183 {
184 token = notEquals();
185 }
186 else
187 {
188 token = not();
189 }
190 break;
191 }
192
193 case '|':
194 {
195 token = pipe();
196 break;
197 }
198
199 case '@':
200 {
201 token = at();
202 break;
203 }
204
205 case ':':
206 {
207 if ( LA(2) == ':' )
208 {
209 token = doubleColon();
210 }
211 else
212 {
213 token = colon();
214 }
215 break;
216 }
217
218 case '*':
219 {
220 token = star();
221 break;
222 }
223
224 case '.':
225 {
226 switch ( LA(2) )
227 {
228 case '0':
229 case '1':
230 case '2':
231 case '3':
232 case '4':
233 case '5':
234 case '6':
235 case '7':
236 case '8':
237 case '9':
238 {
239 token = number();
240 break;
241 }
242 default:
243 {
244 token = dots();
245 break;
246 }
247 }
248 break;
249 }
250
251 case '0':
252 case '1':
253 case '2':
254 case '3':
255 case '4':
256 case '5':
257 case '6':
258 case '7':
259 case '8':
260 case '9':
261 {
262 token = number();
263 break;
264 }
265
266 case ' ':
267 case '\t':
268 case '\n':
269 case '\r':
270 {
271 token = whitespace();
272 break;
273 }
274
275 default:
276 {
277 if ( isIdentifierStartChar( LA(1) ) )
278 {
279 token = identifierOrOperatorName();
280 }
281 }
282 }
283
284 if ( token == null )
285 {
286 if (!hasMoreChars())
287 {
288 token = new Token( TokenTypes.EOF,
289 getXPath(),
290 currentPosition(),
291 endPosition() );
292 }
293 else
294 {
295 token = new Token( TokenTypes.ERROR,
296 getXPath(),
297 currentPosition(),
298 endPosition() );
299 }
300 }
301
302 }
303 while ( token.getTokenType() == TokenTypes.SKIP );
304
305 setPreviousToken( token );
306
307 return token;
308 }
309
310 Token identifierOrOperatorName()
311 {
312 Token token = null;
313
314 Token previousToken = getPreviousToken();
315
316 if ( previousToken != null )
317 {
318
319
320
321
322
323
324
325
326
327
328
329 switch ( previousToken.getTokenType() )
330 {
331 case TokenTypes.AT:
332 case TokenTypes.DOUBLE_COLON:
333 case TokenTypes.LEFT_PAREN:
334 case TokenTypes.LEFT_BRACKET:
335 case TokenTypes.AND:
336 case TokenTypes.OR:
337 case TokenTypes.MOD:
338 case TokenTypes.DIV:
339 case TokenTypes.COLON:
340 case TokenTypes.SLASH:
341 case TokenTypes.DOUBLE_SLASH:
342 case TokenTypes.PIPE:
343 case TokenTypes.DOLLAR:
344 case TokenTypes.PLUS:
345 case TokenTypes.MINUS:
346 case TokenTypes.STAR:
347 case TokenTypes.COMMA:
348 case TokenTypes.LESS_THAN:
349 case TokenTypes.GREATER_THAN:
350 case TokenTypes.LESS_THAN_EQUALS:
351 case TokenTypes.GREATER_THAN_EQUALS:
352 case TokenTypes.EQUALS:
353 case TokenTypes.NOT_EQUALS:
354 {
355 token = identifier();
356 break;
357 }
358 default:
359 {
360 token = operatorName();
361 break;
362 }
363 }
364 }
365 else
366 {
367 token = identifier();
368 }
369
370 return token;
371 }
372
373 Token identifier()
374 {
375 Token token = null;
376
377 int start = currentPosition();
378
379 while ( hasMoreChars() )
380 {
381 if ( isIdentifierChar( LA(1) ) )
382 {
383 consume();
384 }
385 else
386 {
387 break;
388 }
389 }
390
391 token = new Token( TokenTypes.IDENTIFIER,
392 getXPath(),
393 start,
394 currentPosition() );
395
396 return token;
397 }
398
399 Token operatorName()
400 {
401 Token token = null;
402
403 switch ( LA(1) )
404 {
405 case 'a':
406 {
407 token = and();
408 break;
409 }
410
411 case 'o':
412 {
413 token = or();
414 break;
415 }
416
417 case 'm':
418 {
419 token = mod();
420 break;
421 }
422
423 case 'd':
424 {
425 token = div();
426 break;
427 }
428 }
429
430 return token;
431 }
432
433 Token mod()
434 {
435 Token token = null;
436
437 if ( ( LA(1) == 'm' )
438 &&
439 ( LA(2) == 'o' )
440 &&
441 ( LA(3) == 'd' )
442 &&
443 ( ! isIdentifierChar( LA(4) ) ) )
444 {
445 token = new Token( TokenTypes.MOD,
446 getXPath(),
447 currentPosition(),
448 currentPosition()+3 );
449
450 consume();
451 consume();
452 consume();
453 }
454
455 return token;
456 }
457
458 Token div()
459 {
460 Token token = null;
461
462 if ( ( LA(1) == 'd' )
463 &&
464 ( LA(2) == 'i' )
465 &&
466 ( LA(3) == 'v' )
467 &&
468 ( ! isIdentifierChar( LA(4) ) ) )
469 {
470 token = new Token( TokenTypes.DIV,
471 getXPath(),
472 currentPosition(),
473 currentPosition()+3 );
474
475 consume();
476 consume();
477 consume();
478 }
479
480 return token;
481 }
482
483 Token and()
484 {
485 Token token = null;
486
487 if ( ( LA(1) == 'a' )
488 &&
489 ( LA(2) == 'n' )
490 &&
491 ( LA(3) == 'd' )
492 &&
493 ( ! isIdentifierChar( LA(4) ) ) )
494 {
495 token = new Token( TokenTypes.AND,
496 getXPath(),
497 currentPosition(),
498 currentPosition()+3 );
499
500 consume();
501 consume();
502 consume();
503 }
504
505 return token;
506 }
507
508 Token or()
509 {
510 Token token = null;
511
512 if ( ( LA(1) == 'o' )
513 &&
514 ( LA(2) == 'r' )
515 &&
516 ( ! isIdentifierChar( LA(3) ) ) )
517 {
518 token = new Token( TokenTypes.OR,
519 getXPath(),
520 currentPosition(),
521 currentPosition()+2 );
522
523 consume();
524 consume();
525 }
526
527 return token;
528 }
529
530 Token number()
531 {
532 int start = currentPosition();
533 boolean periodAllowed = true;
534
535 loop:
536 while( true )
537 {
538 switch ( LA(1) )
539 {
540 case '.':
541 {
542 if ( periodAllowed )
543 {
544 periodAllowed = false;
545 consume();
546 }
547 else
548 {
549 break loop;
550 }
551 break;
552 }
553
554 case '0':
555 case '1':
556 case '2':
557 case '3':
558 case '4':
559 case '5':
560 case '6':
561 case '7':
562 case '8':
563 case '9':
564 {
565 consume();
566 break;
567 }
568 default:
569 {
570 break loop;
571 }
572 }
573 }
574
575 Token token = null;
576
577 if ( periodAllowed )
578 {
579 token = new Token( TokenTypes.INTEGER,
580 getXPath(),
581 start,
582 currentPosition() );
583 }
584 else
585 {
586 token = new Token( TokenTypes.DOUBLE,
587 getXPath(),
588 start,
589 currentPosition() );
590 }
591
592 return token;
593 }
594
595 Token whitespace()
596 {
597 consume();
598
599 loop:
600 while( hasMoreChars() )
601 {
602 switch ( LA(1) )
603 {
604 case ' ':
605 case '\t':
606 case '\n':
607 case '\r':
608 {
609 consume();
610 break;
611 }
612
613 default:
614 {
615 break loop;
616 }
617 }
618 }
619
620 return new Token( TokenTypes.SKIP,
621 getXPath(),
622 0,
623 0 );
624 }
625
626 Token comma()
627 {
628 Token token = new Token( TokenTypes.COMMA,
629 getXPath(),
630 currentPosition(),
631 currentPosition()+1 );
632
633 consume();
634
635 return token;
636 }
637
638 Token equals()
639 {
640 Token token = new Token( TokenTypes.EQUALS,
641 getXPath(),
642 currentPosition(),
643 currentPosition()+1 );
644
645 consume();
646
647 return token;
648 }
649
650 Token minus()
651 {
652 Token token = new Token( TokenTypes.MINUS,
653 getXPath(),
654 currentPosition(),
655 currentPosition()+1 );
656 consume();
657
658 return token;
659 }
660
661 Token plus()
662 {
663 Token token = new Token( TokenTypes.PLUS,
664 getXPath(),
665 currentPosition(),
666 currentPosition()+1 );
667 consume();
668
669 return token;
670 }
671
672 Token dollar()
673 {
674 Token token = new Token( TokenTypes.DOLLAR,
675 getXPath(),
676 currentPosition(),
677 currentPosition()+1 );
678 consume();
679
680 return token;
681 }
682
683 Token pipe()
684 {
685 Token token = new Token( TokenTypes.PIPE,
686 getXPath(),
687 currentPosition(),
688 currentPosition()+1 );
689
690 consume();
691
692 return token;
693 }
694
695 Token at()
696 {
697 Token token = new Token( TokenTypes.AT,
698 getXPath(),
699 currentPosition(),
700 currentPosition()+1 );
701
702 consume();
703
704 return token;
705 }
706
707 Token colon()
708 {
709 Token token = new Token( TokenTypes.COLON,
710 getXPath(),
711 currentPosition(),
712 currentPosition()+1 );
713 consume();
714
715 return token;
716 }
717
718 Token doubleColon()
719 {
720 Token token = new Token( TokenTypes.DOUBLE_COLON,
721 getXPath(),
722 currentPosition(),
723 currentPosition()+2 );
724
725 consume();
726 consume();
727
728 return token;
729 }
730
731 Token not()
732 {
733 Token token = new Token( TokenTypes.NOT,
734 getXPath(),
735 currentPosition(),
736 currentPosition() + 1 );
737
738 consume();
739
740 return token;
741 }
742
743 Token notEquals()
744 {
745 Token token = new Token( TokenTypes.NOT_EQUALS,
746 getXPath(),
747 currentPosition(),
748 currentPosition() + 2 );
749
750 consume();
751 consume();
752
753 return token;
754 }
755
756 Token relationalOperator()
757 {
758 Token token = null;
759
760 switch ( LA(1) )
761 {
762 case '<':
763 {
764 if ( LA(2) == '=' )
765 {
766 token = new Token( TokenTypes.LESS_THAN_EQUALS,
767 getXPath(),
768 currentPosition(),
769 currentPosition() + 2 );
770 consume();
771 }
772 else
773 {
774 token = new Token( TokenTypes.LESS_THAN,
775 getXPath(),
776 currentPosition(),
777 currentPosition() + 1);
778 }
779
780 consume();
781 break;
782 }
783 case '>':
784 {
785 if ( LA(2) == '=' )
786 {
787 token = new Token( TokenTypes.GREATER_THAN_EQUALS,
788 getXPath(),
789 currentPosition(),
790 currentPosition() + 2 );
791 consume();
792 }
793 else
794 {
795 token = new Token( TokenTypes.GREATER_THAN,
796 getXPath(),
797 currentPosition(),
798 currentPosition() + 1 );
799 }
800
801 consume();
802 break;
803 }
804 }
805
806 return token;
807
808 }
809
810 Token star()
811 {
812 Token token = new Token( TokenTypes.STAR,
813 getXPath(),
814 currentPosition(),
815 currentPosition()+1 );
816
817 consume();
818
819 return token;
820 }
821
822 Token literal()
823 {
824 Token token = null;
825
826 char match = LA(1);
827
828 consume();
829
830 int start = currentPosition();
831
832 while ( ( token == null )
833 &&
834 hasMoreChars() )
835 {
836 if ( LA(1) == match )
837 {
838 token = new Token( TokenTypes.LITERAL,
839 getXPath(),
840 start,
841 currentPosition() );
842 }
843 consume();
844 }
845
846 return token;
847 }
848
849 Token dots()
850 {
851 Token token = null;
852
853 switch ( LA(2) )
854 {
855 case '.':
856 {
857 token = new Token( TokenTypes.DOT_DOT,
858 getXPath(),
859 currentPosition(),
860 currentPosition()+2 ) ;
861 consume();
862 consume();
863 break;
864 }
865 default:
866 {
867 token = new Token( TokenTypes.DOT,
868 getXPath(),
869 currentPosition(),
870 currentPosition()+1 );
871 consume();
872 break;
873 }
874 }
875
876 return token;
877 }
878
879 Token leftBracket()
880 {
881 Token token = new Token( TokenTypes.LEFT_BRACKET,
882 getXPath(),
883 currentPosition(),
884 currentPosition()+1 );
885
886 consume();
887
888 return token;
889 }
890
891 Token rightBracket()
892 {
893 Token token = new Token( TokenTypes.RIGHT_BRACKET,
894 getXPath(),
895 currentPosition(),
896 currentPosition()+1 );
897
898 consume();
899
900 return token;
901 }
902
903 Token leftParen()
904 {
905 Token token = new Token( TokenTypes.LEFT_PAREN,
906 getXPath(),
907 currentPosition(),
908 currentPosition()+1 );
909
910 consume();
911
912 return token;
913 }
914
915 Token rightParen()
916 {
917 Token token = new Token( TokenTypes.RIGHT_PAREN,
918 getXPath(),
919 currentPosition(),
920 currentPosition()+1 );
921
922 consume();
923
924 return token;
925 }
926
927 Token slashes()
928 {
929 Token token = null;
930
931 switch ( LA(2) )
932 {
933 case '/':
934 {
935 token = new Token( TokenTypes.DOUBLE_SLASH,
936 getXPath(),
937 currentPosition(),
938 currentPosition()+2 );
939 consume();
940 consume();
941 break;
942 }
943 default:
944 {
945 token = new Token( TokenTypes.SLASH,
946 getXPath(),
947 currentPosition(),
948 currentPosition()+1 );
949 consume();
950 }
951 }
952
953 return token;
954 }
955
956 char LA(int i)
957 {
958 if ( currentPosition + ( i - 1 ) >= endPosition() )
959 {
960 return (char) -1;
961 }
962
963 return getXPath().charAt( currentPosition() + (i - 1) );
964 }
965
966 void consume()
967 {
968 ++this.currentPosition;
969 }
970
971 void consume(int i)
972 {
973 this.currentPosition += i;
974 }
975
976 int currentPosition()
977 {
978 return this.currentPosition;
979 }
980
981 int endPosition()
982 {
983 return this.endPosition;
984 }
985
986 Token getPreviousToken()
987 {
988 return this.previousToken;
989 }
990
991 void setPreviousToken(Token previousToken)
992 {
993 this.previousToken = previousToken;
994 }
995
996 boolean hasMoreChars()
997 {
998 return currentPosition() < endPosition();
999 }
1000
1001 boolean isIdentifierChar(char c)
1002 {
1003 return Verifier.isXMLNCNameCharacter( c );
1004 }
1005
1006 boolean isIdentifierStartChar(char c)
1007 {
1008 return Verifier.isXMLNCNameStartCharacter( c );
1009 }
1010
1011 }