From eddfeb7bfc5f0a5507076ebc1d17abe03045c682 Mon Sep 17 00:00:00 2001 From: Hans Date: Thu, 11 Nov 2021 03:02:32 +0100 Subject: [PATCH] Cleanup, restructuring --- CMakeLists.txt | 1 + src/common.hpp | 2 +- src/exceptions.hpp | 45 ++++++++++++++++++++++++++++ src/keywords.hpp | 4 +-- src/main.cpp | 74 ++++++++++++++++++++++++++++------------------ src/parse.cpp | 73 +++++++++++++++++++++++++++++++++++++-------- src/parse.hpp | 50 ++++--------------------------- 7 files changed, 160 insertions(+), 89 deletions(-) create mode 100644 src/exceptions.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 03df932..e8b3e43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.10) project(biscuit_interpreter) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) add_executable(biscuit_interpreter src/common.hpp) diff --git a/src/common.hpp b/src/common.hpp index b64eada..41ff8c9 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -12,7 +12,7 @@ enum TokenType enum ValueType { STRING, - DECIMAL, + NUMBER, ANY }; diff --git a/src/exceptions.hpp b/src/exceptions.hpp new file mode 100644 index 0000000..286b782 --- /dev/null +++ b/src/exceptions.hpp @@ -0,0 +1,45 @@ +#ifndef BISCUIT_INTERPRETER_EXCEPTIONS_HPP +#define BISCUIT_INTERPRETER_EXCEPTIONS_HPP + +#include +#include "common.hpp" + +struct MalformedTokenExcept : public std::exception +{ + std::string malformed_str; + inline MalformedTokenExcept(const std::string& arg_str) + : malformed_str(arg_str) {} +}; + +struct UnknownKeywordExcept : public std::exception +{ + std::string unknown_keyword; + inline UnknownKeywordExcept(const std::string& arg_str) + : unknown_keyword(arg_str) {} +}; + +struct WrongArgumentCountExcept : public std::exception +{ + int expected, got; + std::string keyword_name; + inline WrongArgumentCountExcept(const std::string& _name, int _expected, int _got) + : expected(_expected), got(_got), keyword_name(_name) {} +}; + +struct WrongTokenExcept : public std::exception +{ + enum TokenType expected, got; + std::string keyword_name, token_str; + inline WrongTokenExcept(const std::string& _keyword_name, const std::string& _token_str, const enum TokenType& _expected, const enum TokenType& _got) + : expected(_expected), got(_got), keyword_name(_keyword_name), token_str(_token_str) {} +}; + +struct TypeErrorExcept : public std::exception +{ + enum ValueType expected, got; + std::string keyword_name, token_str; + inline TypeErrorExcept(const std::string& _keyword_name, const std::string& _token_str, const enum ValueType& _expected, const enum ValueType& _got) + : expected(_expected), got(_got), keyword_name(_keyword_name), token_str(_token_str) {} +}; + +#endif //BISCUIT_INTERPRETER_EXCEPTIONS_HPP diff --git a/src/keywords.hpp b/src/keywords.hpp index 3230335..9f3ee83 100644 --- a/src/keywords.hpp +++ b/src/keywords.hpp @@ -12,8 +12,8 @@ struct Keyword const std::string name; const int expected_num_args; // -1 means: 1 to infinity, in which case all args are of same type - const std::vector expected_token_types; - const std::vector expected_value_types; + const std::vector expected_token_types; + const std::vector expected_value_types; }; extern const std::vector keywords; diff --git a/src/main.cpp b/src/main.cpp index 76757a1..d70630f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,10 +1,32 @@ +#if defined(WIN32) || defined(WIN32) || defined(__WIN32) +#include +#endif + #include #include #include #include +#include +#include "common.hpp" +#include "exceptions.hpp" #include "parse.hpp" +bool request_console(); + +// Must request console on Windows +#if defined(WIN32) || defined(WIN32) || defined(__WIN32) +bool request_console() +{ + return AllocConsole(); +} +#else +bool request_console() +{ + return true; +} +#endif + std::ifstream get_infile(int argc, char ** argv) { if(argc != 2) @@ -24,40 +46,34 @@ std::ifstream get_infile(int argc, char ** argv) return infile; } +std::string code = \ +R"del( +PRINT 10.0 |hello_world +)del"; + int main(int argc, char * argv[]) { + /* + // Get the entire source code from the file std::ifstream infile = get_infile(argc, argv); - std::string word; - std::vector tokens; + std::stringstream buffer; + buffer << infile.rdbuf(); + */ - while(infile >> word) - { - try - { - Token token(word); - tokens.push_back(token); - } - catch(MalformedIdentifierExcept& exc) - { - std::cout << "Malformed identifier: '" << exc.malformed_str << "'" - << "\nAborting..." << std::endl; - exit(EXIT_FAILURE); - } - catch(UnknownKeywordExcept& exc) - { - std::cout << "Unknown Keyword: '" << exc.unknown_keyword << "'" - << "\nAborting..." << std::endl; - exit(EXIT_FAILURE); - } - catch(WrongTokenExcept& exc) - { - std::cout << "Wrong symbol type\n Aborting..." << std::endl; - exit(EXIT_FAILURE); - } - } + // On Windows, we have to explicitly request a console + /*if(!request_console()) + { + std::cerr << "Failed to get console!" << std::endl; + exit(EXIT_FAILURE); + }*/ - Instruction instr(tokens); - instr.print(); + std::cerr << "POOP" << std::endl; + + std::vector instructions = parse_instructions(code); + + std::cerr << "Number of instructions parsed: " << instructions.size() << std::endl; + for(auto& instr : instructions) + instr.print(); return 0; } diff --git a/src/parse.cpp b/src/parse.cpp index 631ec86..1a0de21 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -1,21 +1,72 @@ #include #include #include +#include +#include "exceptions.hpp" #include "keywords.hpp" #include "parse.hpp" #include "common.hpp" +static const std::string blank_space = "\t "; +static const std::string line_end = "\n"; + +// turn a line (one instruction) into tokens +std::vector tokenize_line(const std::string& line) +{ + std::vector tokens; + + // find all individual tokens + std::size_t end, begin = line.find_first_not_of(blank_space, 0); + while(begin != std::string::npos) + { + end = line.find_first_of(blank_space, begin); + std::string word = line.substr(begin, end - begin); + tokens.emplace_back(word); + + begin = line.find_first_not_of(blank_space, end); + } + + return std::move(tokens); +} + +std::vector parse_instructions(const std::string& code_arg) +{ + // Make a copy of the code and append a line_end symbol to make sure the + // parser doesn't break on EOF + std::string code = code_arg; + code.append(line_end); + + std::vector instructions; + + std::size_t end, begin = code.find_first_not_of(line_end, 0); + while(begin != std::string::npos) + { + end = code.find_first_of(line_end, begin); + std::string line = code.substr(begin, end - begin); + std::vector tokens = tokenize_line(line); + instructions.emplace_back(tokens); + + begin = code.find_first_not_of(line_end, end); + } + + return std::move(instructions); +} bool is_keyword(std::string str) { for(char &c : str) - c = std::tolower(c); + c = (char) std::tolower(c); - for(auto &keyword : keywords) + return std::any_of(keywords.begin(), keywords.end(),[&str](auto& keyword){ + return keyword.name == str; + }); + + /* for(auto &keyword : keywords) if(str == keyword.name) return true; return 0; + */ } bool is_string_literal(const std::string& str) @@ -70,7 +121,7 @@ bool Token::parse_as_literal(const std::string &str) else if(is_decimal_literal(str)) { type = TokenType::LITERAL; - literal_type = ValueType::DECIMAL; + literal_type = ValueType::NUMBER; val_float = std::stof(str); val_string = str; return true; @@ -86,7 +137,9 @@ bool Token::parse_as_identifier(const std::string &str) if(is_identifier(str)) { type = TokenType::IDENTIFIER; + literal_type = ValueType::ANY; val_string = str; + val_float = 0.0f; return true; } return false; @@ -94,13 +147,9 @@ bool Token::parse_as_identifier(const std::string &str) Token::Token(const std::string &str) { - if (parse_as_keyword(str)); - else if(parse_as_literal(str)); - else if(parse_as_identifier(str)); - else - { - throw MalformedIdentifierExcept(str); - } + // First try to parse as str, then as literal, then as identifier + if(!(parse_as_keyword(str) && parse_as_literal(str) && parse_as_identifier(str))) + throw MalformedTokenExcept(str); // if all fails -> malformed token } void Token::print() const @@ -116,7 +165,7 @@ void Token::print() const case TokenType::LITERAL: std::cerr << "Literal of type "; - if(literal_type == ValueType::DECIMAL) + if(literal_type == ValueType::NUMBER) std::cerr << "decimal: " << val_float; else if(literal_type == ValueType::STRING) std::cerr << "string: \"" << val_string << "\""; @@ -128,8 +177,6 @@ void Token::print() const std::cerr << std::endl; } - - Instruction::Instruction(std::vector& _token_list) { // move over the tokens diff --git a/src/parse.hpp b/src/parse.hpp index 9bce45e..82613d4 100644 --- a/src/parse.hpp +++ b/src/parse.hpp @@ -1,61 +1,21 @@ #ifndef BISCUIT_PARSE_HPP_INCLUDED #define BISCUIT_PARSE_HPP_INCLUDED -#include #include #include #include "keywords.hpp" #include "common.hpp" -struct MalformedIdentifierExcept : public std::exception -{ - std::string malformed_str; - MalformedIdentifierExcept(const std::string& arg_str) - : malformed_str(arg_str) {} -}; - -struct UnknownKeywordExcept : public std::exception -{ - std::string unknown_keyword; - UnknownKeywordExcept(const std::string& arg_str) - : unknown_keyword(arg_str) {} -}; - -struct WrongArgumentCountExcept : public std::exception -{ - int expected, got; - std::string keyword_name; - WrongArgumentCountExcept(std::string _name, int _expected, int _got) - : expected(_expected), got(_got), keyword_name(_name) {} -}; - -struct WrongTokenExcept : public std::exception -{ - TokenType expected, got; - std::string keyword_name, token_str; - WrongTokenExcept(std::string _keyword_name, std::string _token_str, const TokenType& _expected, const TokenType& _got) - : expected(_expected), got(_got), keyword_name(_keyword_name), token_str(_token_str) {} -}; - -struct TypeErrorExcept : public std::exception -{ - ValueType expected, got; - std::string keyword_name, token_str; - TypeErrorExcept(std::string _keyword_name, std::string _token_str, const ValueType& _expected, const ValueType& _got) - : expected(_expected), got(_got), keyword_name(_keyword_name), token_str(_token_str) {} -}; - - struct Token { private: - bool parse_as_keyword(const std::string &str); + bool parse_as_keyword(const std::string& str); - bool parse_as_literal(const std::string &str); + bool parse_as_literal(const std::string& str); // if a word is not a keyword or literal, it must be identifier - bool parse_as_identifier(const std::string &str); + bool parse_as_identifier(const std::string& str); public: enum TokenType type; @@ -63,7 +23,7 @@ public: float val_float; std::string val_string; - Token(const std::string &str); + Token(const std::string& str); void print() const; }; @@ -79,4 +39,6 @@ public: void print(); }; +std::vector parse_instructions(const std::string& code_arg); + #endif