@@ -654,7 +654,8 @@ impl<'a> StringReader<'a> {
654
654
// Note: r as in r" or r#" is part of a raw string literal,
655
655
// b as in b' is part of a byte literal.
656
656
// They are not identifiers, and are handled further down.
657
- ( 'r' , Some ( '"' ) ) | ( 'r' , Some ( '#' ) ) | ( 'b' , Some ( '\'' ) ) => false ,
657
+ ( 'r' , Some ( '"' ) ) | ( 'r' , Some ( '#' ) ) |
658
+ ( 'b' , Some ( '"' ) ) | ( 'b' , Some ( '\'' ) ) => false ,
658
659
_ => true
659
660
} {
660
661
let start = self . last_pos ;
@@ -859,62 +860,124 @@ impl<'a> StringReader<'a> {
859
860
}
860
861
'b' => {
861
862
self . bump ( ) ;
862
- assert ! ( self . curr_is( '\'' ) , "Should have been a token::IDENT" ) ;
863
- self . bump ( ) ;
864
- let start = self . last_pos ;
865
-
866
- // the eof will be picked up by the final `'` check below
867
- let mut c2 = self . curr . unwrap_or ( '\x00' ) ;
868
- self . bump ( ) ;
863
+ return match self . curr {
864
+ Some ( '\'' ) => parse_byte ( self ) ,
865
+ Some ( '"' ) => parse_byte_string ( self ) ,
866
+ _ => unreachable ! ( ) // Should have been a token::IDENT above.
867
+ } ;
869
868
870
- match c2 {
871
- '\\' => {
872
- // '\X' for some X must be a character constant:
873
- let escaped = self . curr ;
874
- let escaped_pos = self . last_pos ;
875
- self . bump ( ) ;
876
- match escaped {
877
- None => { }
878
- Some ( e) => {
879
- c2 = match e {
880
- 'n' => '\n' ,
881
- 'r' => '\r' ,
882
- 't' => '\t' ,
883
- '\\' => '\\' ,
884
- '\'' => '\'' ,
885
- '"' => '"' ,
886
- '0' => '\x00' ,
887
- 'x' => self . scan_numeric_escape ( 2 u, '\'' ) ,
888
- c2 => {
889
- self . err_span_char ( escaped_pos, self . last_pos ,
890
- "unknown byte escape" , c2) ;
891
- c2
869
+ fn parse_byte ( self_ : & mut StringReader ) -> token:: Token {
870
+ self_. bump ( ) ;
871
+ let start = self_. last_pos ;
872
+
873
+ // the eof will be picked up by the final `'` check below
874
+ let mut c2 = self_. curr . unwrap_or ( '\x00' ) ;
875
+ self_. bump ( ) ;
876
+
877
+ match c2 {
878
+ '\\' => {
879
+ // '\X' for some X must be a character constant:
880
+ let escaped = self_. curr ;
881
+ let escaped_pos = self_. last_pos ;
882
+ self_. bump ( ) ;
883
+ match escaped {
884
+ None => { }
885
+ Some ( e) => {
886
+ c2 = match e {
887
+ 'n' => '\n' ,
888
+ 'r' => '\r' ,
889
+ 't' => '\t' ,
890
+ '\\' => '\\' ,
891
+ '\'' => '\'' ,
892
+ '"' => '"' ,
893
+ '0' => '\x00' ,
894
+ 'x' => self_. scan_numeric_escape ( 2 u, '\'' ) ,
895
+ c2 => {
896
+ self_. err_span_char (
897
+ escaped_pos, self_. last_pos ,
898
+ "unknown byte escape" , c2) ;
899
+ c2
900
+ }
892
901
}
893
902
}
894
903
}
895
904
}
905
+ '\t' | '\n' | '\r' | '\'' => {
906
+ self_. err_span_char ( start, self_. last_pos ,
907
+ "byte constant must be escaped" , c2) ;
908
+ }
909
+ _ => if c2 > '\x7F' {
910
+ self_. err_span_char ( start, self_. last_pos ,
911
+ "byte constant must be ASCII. \
912
+ Use a \\ xHH escape for a non-ASCII byte", c2) ;
913
+ }
896
914
}
897
- '\t' | '\n' | '\r' | '\'' => {
898
- self . err_span_char ( start, self . last_pos ,
899
- "byte constant must be escaped" , c2) ;
900
- }
901
- _ if c2 > '\x7F' => {
902
- self . err_span_char ( start, self . last_pos ,
903
- "byte constant must be ASCII. \
904
- Use a \\ xHH escape for a non-ASCII byte", c2) ;
915
+ if !self_. curr_is ( '\'' ) {
916
+ // Byte offsetting here is okay because the
917
+ // character before position `start` are an
918
+ // ascii single quote and ascii 'b'.
919
+ self_. fatal_span_verbose (
920
+ start - BytePos ( 2 ) , self_. last_pos ,
921
+ "unterminated byte constant" . to_string ( ) ) ;
905
922
}
906
- _ => { }
923
+ self_. bump ( ) ; // advance curr past token
924
+ return token:: LIT_BYTE ( c2 as u8 ) ;
907
925
}
908
- if !self . curr_is ( '\'' ) {
909
- self . fatal_span_verbose (
910
- // Byte offsetting here is okay because the
911
- // character before position `start` are an
912
- // ascii single quote and ascii 'b'.
913
- start - BytePos ( 2 ) , self . last_pos ,
914
- "unterminated byte constant" . to_string ( ) ) ;
926
+
927
+ fn parse_byte_string ( self_ : & mut StringReader ) -> token:: Token {
928
+ self_. bump ( ) ;
929
+ let start = self_. last_pos ;
930
+ let mut value = Vec :: new ( ) ;
931
+ while !self_. curr_is ( '"' ) {
932
+ if self_. is_eof ( ) {
933
+ self_. fatal_span ( start, self_. last_pos ,
934
+ "unterminated double quote byte string" ) ;
935
+ }
936
+
937
+ let ch = self_. curr . unwrap ( ) ;
938
+ self_. bump ( ) ;
939
+ match ch {
940
+ '\\' => {
941
+ if self_. is_eof ( ) {
942
+ self_. fatal_span ( start, self_. last_pos ,
943
+ "unterminated double quote byte string" ) ;
944
+ }
945
+
946
+ let escaped = self_. curr . unwrap ( ) ;
947
+ let escaped_pos = self_. last_pos ;
948
+ self_. bump ( ) ;
949
+ match escaped {
950
+ 'n' => value. push ( '\n' as u8 ) ,
951
+ 'r' => value. push ( '\r' as u8 ) ,
952
+ 't' => value. push ( '\t' as u8 ) ,
953
+ '\\' => value. push ( '\\' as u8 ) ,
954
+ '\'' => value. push ( '\'' as u8 ) ,
955
+ '"' => value. push ( '"' as u8 ) ,
956
+ '\n' => self_. consume_whitespace ( ) ,
957
+ '0' => value. push ( 0 ) ,
958
+ 'x' => {
959
+ value. push ( self_. scan_numeric_escape ( 2 u, '"' ) as u8 ) ;
960
+ }
961
+ c2 => {
962
+ self_. err_span_char ( escaped_pos, self_. last_pos ,
963
+ "unknown byte string escape" , c2) ;
964
+ }
965
+ }
966
+ }
967
+ _ => {
968
+ if ch <= '\x7F' {
969
+ value. push ( ch as u8 )
970
+ } else {
971
+ self_. err_span_char ( self_. last_pos , self_. last_pos ,
972
+ "byte string must be ASCII. \
973
+ Use a \\ xHH escape for a non-ASCII byte", ch) ;
974
+ }
975
+ }
976
+ }
977
+ }
978
+ self_. bump ( ) ;
979
+ return token:: LIT_BINARY ( Rc :: new ( value) ) ;
915
980
}
916
- self . bump ( ) ; // advance curr past token
917
- return token:: LIT_BYTE ( c2 as u8 ) ;
918
981
}
919
982
'"' => {
920
983
let mut accum_str = String :: new ( ) ;
0 commit comments