diff options
| -rw-r--r-- | Cargo.lock | 86 | ||||
| -rw-r--r-- | Cargo.toml | 1 | ||||
| -rw-r--r-- | src/tokenizer.rs | 170 | ||||
| -rw-r--r-- | tests/simple.tdy | 8 |
4 files changed, 187 insertions, 78 deletions
@@ -1,5 +1,91 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] +name = "beef" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "474a626a67200bd107d44179bb3d4fc61891172d11696609264589be6a0e6a43" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "logos" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91c49573597a5d6c094f9031617bb1fed15c0db68c81e6546d313414ce107e4" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-derive" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "797b1f8a0571b331c1b47e7db245af3dc634838da7a92b3bef4e30376ae1c347" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn", + "utf8-ranges", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-syntax" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" + +[[package]] +name = "syn" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc60a3d73ea6594cd712d830cc1f0390fd71542d8c8cd24e70cc54cdfd5e05d5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] name = "tihdy" version = "0.1.0" +dependencies = [ + "logos", +] + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "utf8-ranges" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" @@ -7,3 +7,4 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +logos = "0.11.4" diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 371ae94..f1f0658 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,87 +1,101 @@ -use std::{env, fs}; - -#[derive(Debug)] -pub enum TokenKind { - Identifier(String), String(String), Float(f64), Int(i64), Bool(bool), - - If, For, In, Loop, - - Plus, Minus, Star, Slash, - PlusPlus, MinusMinus, - PlusEqual, MinusEqual, StarEqual, SlashEqual, - - Colon, ColonColon, - Equal, EqualEqual, - - LeftParen, RightParen, - - LeftBracket, RightBracket, - - LeftBrace, RightBrace, - - Greater, Less, - GreaterEqual, LessEqual, - +use std::fs; +use logos::Logos; + +#[derive(Logos, Debug, PartialEq)] +pub enum Token { + #[regex(r"[[:alpha:]][[:alnum:]]*", |lex| lex.slice().to_string())] + Identifier(String), + + #[regex(r#""[^"]*""#, |lex| lex.slice().to_string())] + String(String), + + #[regex(r"[\d]+\.[\d]*|[\d]*\.[\d]+", |lex| lex.slice().parse(), priority=2)] + Float(f64), + #[regex(r"[\d]+", |lex| lex.slice().parse())] + Int(i64), + + #[regex(r"true|false", |lex| lex.slice().parse(), priority=2)] + Bool(bool), + + #[token("if")] + If, + #[token("for")] + For, + #[token("in")] + In, + #[token("loop")] + Loop, + + #[token("+")] + Plus, + #[token("++")] + PlusPlus, + #[token("-")] + Minus, + #[token("--")] + MinusMinus, + #[token("*")] + Star, + #[token("/")] + Slash, + #[token("+=")] + PlusEqual, + #[token("-=")] + MinusEqual, + #[token("*=")] + StarEqual, + #[token("/=")] + SlashEqual, + + #[token(":")] + Colon, + #[token("::")] + ColonColon, + #[token("=")] + Equal, + #[token("==")] + EqualEqual, + + #[token("(")] + LeftParen, + #[token(")")] + RightParen, + + #[token("[")] + LeftBracket, + #[token("]")] + RightBracket, + + #[token("{")] + LeftBrace, + #[token("}")] + RightBrace, + + #[token(">")] + Greater, + #[token(">=")] + GreaterEqual, + #[token("<")] + Less, + #[token("<=")] + LessEqual, + + #[token(".")] + Dot, + #[token("->")] Arrow, + #[token("\n")] Newline, - Error, - EOF, -} - -#[derive(Debug)] -pub struct Token <'a> { - kind: TokenKind, - - row: i32, - col: i32, - filename: &'a str, -} - -use std::iter::Peekable; -use std::str::Chars; + #[regex(r"[ \t\r]", logos::skip)] + Whitespace, -fn parse_number(c: char, chars: &mut Peekable<Chars>) -> TokenKind { - let mut number = String::from(c); - loop { - if let Some(c) = chars.peek() { - match *c { - '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8'| '9' | '.' => {} - _ => { break; } - } - } - number.push(chars.next().unwrap()); - } - if number.contains(".") { - return TokenKind::Float(number.parse::<f64>().unwrap()); - } else { - return TokenKind::Int(number.parse::<i64>().unwrap()); - } + #[error] + Error, } pub fn file_to_tokens(filename: &str) -> Vec<Token> { let content = fs::read_to_string(filename).unwrap(); - - let mut tokens = Vec::new(); - - let mut row = 1; - let mut col = 0; - - let mut chars = content.chars().peekable(); - while let Some(c) = chars.next() { - let mut kind = match c { - '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8'| '9' | '.' => { - parse_number(c, &mut chars) - } - _ => { - TokenKind::Error - } - }; - - tokens.push(Token{kind, row, col, filename}); - } - - tokens.push(Token{kind: TokenKind::EOF, row, col, filename}); - - return tokens; + let lexer = Token::lexer(&content); + lexer.collect() } diff --git a/tests/simple.tdy b/tests/simple.tdy index c0fdb77..d962330 100644 --- a/tests/simple.tdy +++ b/tests/simple.tdy @@ -1,2 +1,10 @@ 1234 1234.123 + +a.b + +if abcde { + a = true + c++ +} +b = false |
