684 lines
24 KiB
C++
684 lines
24 KiB
C++
|
|
#ifndef FRAMELIB_EXPRPARSER_H
|
|
#define FRAMELIB_EXPRPARSER_H
|
|
|
|
#include "FrameLib_Types.h"
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <functional>
|
|
#include <limits>
|
|
#include <vector>
|
|
|
|
#include <string.h>
|
|
|
|
namespace FrameLib_ExprParser
|
|
{
|
|
// Parsing Error Types
|
|
|
|
enum ExprParseError
|
|
{
|
|
kNoError = 0,
|
|
|
|
kLexError_UnknownChar,
|
|
kLexError_ZeroLengthToken,
|
|
|
|
kParseError_EmptySubExpr,
|
|
kParseError_StrayParenthesis,
|
|
kParseError_NotFunction,
|
|
kParseError_UnknownToken,
|
|
kParseError_TooFewCommas,
|
|
kParseError_TooManyCommas,
|
|
kParseError_OpPosition,
|
|
kParseError_StrayItem
|
|
};
|
|
|
|
// A base class for functions and operators
|
|
|
|
template <class T>
|
|
struct OpBase
|
|
{
|
|
OpBase(const char* name, unsigned int precedence) : mName(name), mPrecedence(precedence) {}
|
|
virtual ~OpBase() {}
|
|
|
|
virtual size_t numItems() const = 0;
|
|
virtual T call(T a, T b, T c) const = 0;
|
|
virtual FrameLib_DSP *create(FrameLib_Context context) const = 0;
|
|
|
|
const char* mName;
|
|
unsigned int mPrecedence;
|
|
};
|
|
|
|
// Graph Structure for Output
|
|
|
|
enum InputType { kInputUnused, kInputConstant, kInputVariable, kInputResult };
|
|
enum IndexType { kIsInput, kIsOutput };
|
|
|
|
template <class T>
|
|
struct Graph
|
|
{
|
|
Graph() : mNumInputs(1), mConstant(0.0) {}
|
|
|
|
struct Input
|
|
{
|
|
Input() : mType(kInputUnused), mIndex(-1), mValue(0.0) {}
|
|
Input(T value) : mType(kInputConstant), mIndex(-1), mValue(value) {}
|
|
Input(IndexType type, long index) : mType(type == kIsInput ? kInputVariable : kInputResult), mIndex(index), mValue(0.0) {}
|
|
|
|
InputType mType;
|
|
long mIndex;
|
|
T mValue;
|
|
};
|
|
|
|
struct Operation
|
|
{
|
|
Operation(const OpBase<T> *op) : mOp(op) {}
|
|
|
|
const OpBase<T> *mOp;
|
|
Input mIns[3];
|
|
};
|
|
|
|
std::vector<Operation> mOperations;
|
|
long mNumInputs;
|
|
T mConstant;
|
|
};
|
|
|
|
// Parser Class
|
|
|
|
template <class T>
|
|
class Parser
|
|
{
|
|
enum TokenType { kParenthesisLHS, kParenthesisRHS, kComma, kOperator, kNumber, kSymbol, kUnknown };
|
|
enum NodeType { kToken, kConstant, kVariable, kResult };
|
|
|
|
// A token with a type and string
|
|
|
|
struct Token
|
|
{
|
|
Token() : mType(kUnknown), mText ("") {}
|
|
Token(TokenType type, const char *string, size_t length) : mType(type), mText(string, length) {}
|
|
|
|
TokenType mType;
|
|
std::string mText;
|
|
};
|
|
|
|
// A node that might be a token (yet to be converted), a number, a result or a variable
|
|
|
|
class Node
|
|
{
|
|
|
|
public:
|
|
|
|
Node() : mType(kToken), mValue(0.0), mIndex(-1) {}
|
|
Node(Token token) : mType(kToken), mToken(token), mValue(0.0), mIndex(-1) {}
|
|
Node(T constant) : mType(kConstant), mValue(constant), mIndex(-1) {}
|
|
Node(IndexType type, long index) : mType(type == kIsInput ? kVariable : kResult), mValue(0.0), mIndex(index) {}
|
|
|
|
// Checks for types of node / tokens
|
|
|
|
inline bool isLHSParenthesis() const { return mToken.mType == kParenthesisLHS; }
|
|
inline bool isRHSParenthesis() const { return mToken.mType == kParenthesisRHS; }
|
|
inline bool isComma() const { return mToken.mType == kComma; }
|
|
inline bool isOperator() const { return mToken.mType == kOperator; }
|
|
inline bool isSymbol() const { return mToken.mType == kSymbol; }
|
|
inline bool isNumber() const { return mToken.mType == kNumber; }
|
|
inline bool isVariable() const { return mType == kVariable; }
|
|
inline bool isConstant() const { return mType == kConstant; }
|
|
inline bool isToken() const { return mType == kToken; }
|
|
|
|
// Getters
|
|
|
|
inline long getIndex() const { return mIndex; }
|
|
inline T getValue() const { return mValue; }
|
|
const char *getTokenString() const { return mToken.mText.c_str(); }
|
|
|
|
// Input Token Conversion
|
|
|
|
long convertInputTokens()
|
|
{
|
|
if (isSymbol() && !mToken.mText.compare(0, 2, "in") && mToken.mText.length() > 2)
|
|
{
|
|
for (size_t i = 2; i < mToken.mText.length(); i++)
|
|
if (!isDigit(mToken.mText[i]))
|
|
return -1;
|
|
|
|
*this = Node(kIsInput, std::stol(mToken.mText.substr(2)) - 1);
|
|
}
|
|
|
|
return mIndex + 1;
|
|
}
|
|
|
|
private:
|
|
|
|
NodeType mType;
|
|
Token mToken;
|
|
T mValue;
|
|
long mIndex;
|
|
};
|
|
|
|
// Typedefs for brevity
|
|
|
|
typedef std::vector<Node> NodeList;
|
|
typedef typename std::vector<Node>::iterator NodeListIterator;
|
|
|
|
// Constant Struct
|
|
|
|
struct Constant
|
|
{
|
|
Constant(const char* name, T value) : mName(name), mValue(value) {}
|
|
|
|
const char *mName;
|
|
T mValue;
|
|
};
|
|
|
|
// Operator List Class
|
|
|
|
class OperatorList
|
|
{
|
|
|
|
public:
|
|
|
|
OperatorList() {}
|
|
OperatorList(OperatorList&&) = default;
|
|
OperatorList& operator=(OperatorList&&) = default;
|
|
|
|
void addItem(OpBase<T> *item)
|
|
{
|
|
mItems.add(item);
|
|
}
|
|
|
|
const OpBase<T> *getItem(const char *name) const
|
|
{
|
|
for (auto it = mItems.begin(); it != mItems.end(); it++)
|
|
if (!strcmp(name, (*it)->mName))
|
|
return it->get();
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
private:
|
|
|
|
// Deleted
|
|
|
|
OperatorList(const OperatorList&) = delete;
|
|
OperatorList& operator=(const OperatorList&) = delete;
|
|
|
|
FrameLib_OwnedList<OpBase<T>> mItems;
|
|
};
|
|
|
|
public:
|
|
|
|
Parser(unsigned int precedenceLevels) : mMaxLengthOp(0)
|
|
{
|
|
mOperators.resize(precedenceLevels);
|
|
}
|
|
|
|
void addOperator(OpBase<T>* op)
|
|
{
|
|
unsigned int precedence = op->mPrecedence;
|
|
|
|
assert((precedence < mOperators.size() && "precedence value too large"));
|
|
|
|
// Add the operator at the right precedence level and keep a list of unique characters
|
|
|
|
mOperators[precedence].addItem(op);
|
|
for (const char *name = op->mName; *name; name++)
|
|
if (!(isLetter(*name) || isDigit(*name) || isReservedChar(*name)))
|
|
mOperatorCharList.push_back(*name);
|
|
|
|
mMaxLengthOp = std::max(mMaxLengthOp, strlen(op->mName));
|
|
|
|
// Sort so that we can keep the unique characters by using std:unique and std::erase
|
|
|
|
std::sort(mOperatorCharList.begin(), mOperatorCharList.end());
|
|
auto it = std::unique(mOperatorCharList.begin(), mOperatorCharList.end());
|
|
mOperatorCharList.erase(it, mOperatorCharList.end());
|
|
}
|
|
|
|
void addFunction(OpBase<T> *item)
|
|
{
|
|
mFunctions.addItem(item);
|
|
}
|
|
|
|
void addConstant(const char *name, T value)
|
|
{
|
|
mConstants.push_back(Constant(name, value));
|
|
}
|
|
|
|
void setDefaultConstant(T constant)
|
|
{
|
|
mDefaultConstant = constant;
|
|
}
|
|
|
|
ExprParseError parse(Graph<T>& graph, const char *expr)
|
|
{
|
|
NodeList nodes;
|
|
ExprParseError error;
|
|
T value = 0.0;
|
|
|
|
graph = Graph<T>();
|
|
|
|
// Run lexer
|
|
|
|
if ((error = lex(nodes, expr)))
|
|
return error;
|
|
|
|
// Replace number tokens and symbols representing constants/variables with relevant node types
|
|
|
|
for (auto it = nodes.begin(); it != nodes.end(); it++)
|
|
{
|
|
// Convert number tokens to constants
|
|
|
|
if (it->isNumber())
|
|
*it = Node(convertTextToNumber(it->getTokenString()));
|
|
|
|
// Convert constant symbols to constants
|
|
|
|
if (it->isSymbol() && getConstant(it->getTokenString(), value))
|
|
*it = Node(value);
|
|
|
|
// Convert variable tokens to variables
|
|
|
|
graph.mNumInputs = std::max(it->convertInputTokens(), graph.mNumInputs);
|
|
}
|
|
|
|
// Parse
|
|
|
|
if ((error = recursiveParse(graph, nodes, 1)))
|
|
return error;
|
|
|
|
if (nodes.size() && nodes[0].isConstant())
|
|
graph.mConstant = nodes[0].getValue();
|
|
|
|
return kNoError;
|
|
}
|
|
|
|
private:
|
|
|
|
// Detect Character Classes
|
|
|
|
static inline bool isWhitespace(char c)
|
|
{
|
|
return (c == ' ') ||
|
|
(c == '\n') ||
|
|
(c == '\r') ||
|
|
(c == '\t') ||
|
|
(c == '\b') ||
|
|
(c == '\v') ||
|
|
(c == '\f');
|
|
}
|
|
|
|
static inline bool isLetter(char c)
|
|
{
|
|
return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
|
|
}
|
|
|
|
static inline bool isDigit(char c) { return (c >= '0') && (c <= '9'); }
|
|
|
|
inline bool isOperatorChar(const char c)
|
|
{
|
|
return std::find(mOperatorCharList.begin(), mOperatorCharList.end(), c) != mOperatorCharList.end();
|
|
}
|
|
|
|
// Detect Single Characters
|
|
|
|
static inline bool isUnderscore(char c) { return (c == '_'); }
|
|
static inline bool isPeriod(char c) { return (c == '.'); }
|
|
static inline bool isComma(char c) { return (c == ','); }
|
|
static inline bool isLHSParenthesis(char c) { return (c == '('); }
|
|
static inline bool isRHSParenthesis(char c) { return (c == ')'); }
|
|
|
|
// Detect Sets of Single Characters
|
|
|
|
static inline bool isParenthesis(char c) { return isLHSParenthesis(c) || isRHSParenthesis(c); }
|
|
static inline bool isReservedChar(char c) { return isUnderscore(c) || isPeriod(c) || isComma(c) || isParenthesis(c); }
|
|
|
|
// Lexing functionality
|
|
|
|
// Retrieve the type of token from its first character
|
|
|
|
TokenType getTokenType(const char c)
|
|
{
|
|
if (isLHSParenthesis(c)) return kParenthesisLHS;
|
|
if (isRHSParenthesis(c)) return kParenthesisRHS;
|
|
if (isComma(c)) return kComma;
|
|
if (isDigit(c)) return kNumber;
|
|
if (isLetter(c)) return kSymbol;
|
|
if (isOperatorChar(c)) return kOperator;
|
|
|
|
return kUnknown;
|
|
}
|
|
|
|
T convertTextToNumber(const char*text) { return std::stod(text); }
|
|
|
|
// Find the extent of an operator token
|
|
|
|
const char *extentOperator(const char *expr)
|
|
{
|
|
for (size_t i = std::min(strlen(expr), mMaxLengthOp); i > 0; i--)
|
|
{
|
|
std::string op(expr, i);
|
|
|
|
if (getOperator(op.c_str()))
|
|
return expr + i;
|
|
}
|
|
|
|
return expr;
|
|
}
|
|
|
|
// Find the extent of a number token
|
|
|
|
const char *extentNumber(const char *expr)
|
|
{
|
|
if (isDigit(*expr))
|
|
{
|
|
while (isDigit(*expr)) expr++;
|
|
if (isPeriod(*expr)) for (expr++; isDigit(*expr); expr++);
|
|
}
|
|
|
|
return expr;
|
|
}
|
|
|
|
// Find the extent of a symbol token
|
|
|
|
const char *extentSymbol(const char *expr)
|
|
{
|
|
if (isLetter(*expr))
|
|
while (isLetter(*expr) || isUnderscore(*expr) || isDigit(*expr)) expr++;
|
|
return expr;
|
|
}
|
|
|
|
// Lex the input into tokens or nodes
|
|
|
|
ExprParseError lex(std::vector<Node>& tokens, const char *expr)
|
|
{
|
|
while (*expr)
|
|
{
|
|
TokenType type;
|
|
const char *end = expr;
|
|
|
|
// Ignore whitespace
|
|
|
|
while (isWhitespace(*expr)) expr++;
|
|
|
|
if (!(*expr))
|
|
break;
|
|
|
|
// Lex the tokens
|
|
|
|
switch((type = getTokenType(*expr)))
|
|
{
|
|
case kParenthesisLHS: end = expr + 1; break;
|
|
case kParenthesisRHS: end = expr + 1; break;
|
|
case kComma: end = expr + 1; break;
|
|
case kOperator: end = extentOperator(expr); break;
|
|
case kNumber: end = extentNumber(expr); break;
|
|
case kSymbol: end = extentSymbol(expr); break;
|
|
case kUnknown: return kLexError_UnknownChar;
|
|
}
|
|
|
|
if (end <= expr)
|
|
return kLexError_ZeroLengthToken;
|
|
|
|
tokens.push_back(Token(type, expr, end - expr));
|
|
expr = end;
|
|
}
|
|
|
|
return kNoError;
|
|
}
|
|
|
|
// Parsing functionality
|
|
|
|
// Constants
|
|
|
|
bool getConstant(const char *name, T& value) const
|
|
{
|
|
for (auto it = mConstants.begin(); it != mConstants.end(); it++)
|
|
{
|
|
if (!strcmp(name, it->mName))
|
|
{
|
|
value = it->mValue;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// Operators / Functions
|
|
|
|
const OpBase<T> *getOperator(const char *name, unsigned int precedence) const
|
|
{
|
|
return mOperators[precedence].getItem(name);
|
|
}
|
|
|
|
const OpBase<T> *getOperator(const char *name) const
|
|
{
|
|
const OpBase<T> *op = nullptr;
|
|
|
|
for (unsigned int i = 0; i < static_cast<unsigned int>(mOperators.size()); i++)
|
|
if ((op = getOperator(name, i)))
|
|
return op;
|
|
|
|
return op;
|
|
}
|
|
|
|
// Parsing Functions
|
|
|
|
typename Graph<T>::Input parseInput(const Node& arg)
|
|
{
|
|
if (arg.isConstant())
|
|
return typename Graph<T>::Input(arg.getValue());
|
|
if (arg.isVariable())
|
|
return typename Graph<T>::Input(kIsInput, arg.getIndex());
|
|
|
|
return typename Graph<T>::Input(kIsOutput, arg.getIndex());
|
|
}
|
|
|
|
Node parseOperation(Graph<T>& graph, const OpBase<T> *op, const Node& arg1, const Node& arg2, const Node& arg3)
|
|
{
|
|
// Fold constants
|
|
|
|
if (arg1.isConstant() && arg2.isConstant() && arg3.isConstant())
|
|
return Node(op->call(arg1.getValue(), arg2.getValue(), arg3.getValue()));
|
|
|
|
// Add to the graph
|
|
|
|
graph.mOperations.push_back(op);
|
|
|
|
size_t numItems = op->numItems();
|
|
if (numItems) graph.mOperations.back().mIns[0] = parseInput(arg1);
|
|
if (numItems > 1) graph.mOperations.back().mIns[1] = parseInput(arg2);
|
|
if (numItems > 2) graph.mOperations.back().mIns[2] = parseInput(arg3);
|
|
|
|
return Node(kIsOutput, static_cast<long>(graph.mOperations.size() - 1));
|
|
}
|
|
|
|
ExprParseError parseUnaryOperator(Graph<T>& graph, const OpBase<T> *op, NodeList& nodes, const NodeListIterator& it)
|
|
{
|
|
// Check the operator isn't last, that its either first, or precededed by an operator and not followed by one
|
|
// N.B. return no error in case this is also an operator later, otherwise it'll be picked up as a stray item
|
|
|
|
if ((it == nodes.end() - 1) || (it != nodes.begin() && !(it - 1)->isOperator()) || (it + 1)->isOperator())
|
|
return kNoError;
|
|
|
|
// Parse operation amd erase consumed node (the iterator is still valid so don't alter)
|
|
|
|
*it = parseOperation(graph, op, *(it + 1), *(it + 1), *(it + 1));
|
|
nodes.erase(it + 1);
|
|
|
|
return kNoError;
|
|
}
|
|
|
|
ExprParseError parseBinaryOperator(Graph<T>& graph, const OpBase<T> *op, NodeList& nodes, NodeListIterator& it)
|
|
{
|
|
// Check the operator is not at the beginning or end and the nodes either side are not operators
|
|
|
|
if (it == nodes.begin() || it == nodes.end() - 1 || (it - 1)->isOperator() || (it + 1)->isOperator())
|
|
return kParseError_OpPosition;
|
|
|
|
// Parse operation and erase consumed nodes (set the iterator back one, as it will be incremented)
|
|
|
|
*(it - 1) = parseOperation(graph, op, *(it - 1), *(it + 1), *(it + 1));
|
|
it = nodes.erase(it, it + 2) - 1;
|
|
|
|
return kNoError;
|
|
}
|
|
|
|
void parseFunction(Graph<T>& graph, const OpBase<T> *function, NodeList& nodes)
|
|
{
|
|
size_t numItems = function->numItems();
|
|
|
|
if (!numItems) nodes.push_back(Node(0.0));
|
|
|
|
nodes[0] = parseOperation(graph, function, nodes[0], nodes[numItems > 1 ? 1 : 0], nodes[numItems > 2 ? 2 : 0]);
|
|
}
|
|
|
|
ExprParseError parseOperators(Graph<T>& graph, Node &result, NodeList& nodes)
|
|
{
|
|
const OpBase<T> *op = nullptr;
|
|
ExprParseError error = kNoError;
|
|
|
|
// Check for any remaining tokens that are not operators
|
|
|
|
for (auto it = nodes.begin(); it != nodes.end(); it++)
|
|
if (it->isToken() && !it->isOperator())
|
|
return kParseError_UnknownToken;
|
|
|
|
// Now resolve unary operators by searching right-to-left
|
|
|
|
for (auto it = nodes.end(); it != nodes.begin(); it--)
|
|
if ((op = getOperator((it - 1)->getTokenString(), 0)))
|
|
if ((error = parseUnaryOperator(graph, op, nodes, it - 1)))
|
|
return error;
|
|
|
|
// Now resolve binary operators in order of precedence
|
|
|
|
for (unsigned int i = 1; i < static_cast<unsigned int>(mOperators.size()); i++)
|
|
{
|
|
for (auto it = nodes.begin(); it < nodes.end(); it++)
|
|
if ((op = getOperator(it->getTokenString(), i)))
|
|
if ((error = parseBinaryOperator(graph, op, nodes, it)))
|
|
return error;
|
|
}
|
|
|
|
// Store result and return, checking for stray items
|
|
|
|
result = nodes[0];
|
|
|
|
return (nodes.size() != 1 || nodes[0].isToken()) ? kParseError_StrayItem : kNoError;;
|
|
}
|
|
|
|
ExprParseError recursiveParse(Graph<T>& graph, NodeList& nodes, size_t resultItems)
|
|
{
|
|
NodeListIterator n1, n2;
|
|
ExprParseError error = kNoError;
|
|
|
|
// Check for empty node list
|
|
|
|
if (nodes.empty())
|
|
return kParseError_EmptySubExpr;
|
|
|
|
// Parentheses
|
|
|
|
for (n2 = nodes.begin(); ; )
|
|
{
|
|
const OpBase<T> *function = nullptr;
|
|
|
|
// Find first RHS parenthesis and matching LHS parenthesis
|
|
|
|
for ( ; n2 != nodes.end(); n2++)
|
|
if (n2->isRHSParenthesis())
|
|
break;
|
|
|
|
// N.B. n1 is offset to stay in range
|
|
|
|
for (n1 = n2; n1 != nodes.begin(); n1--)
|
|
if ((n1 - 1)->isLHSParenthesis())
|
|
break;
|
|
|
|
// Check that there are parentheses found (if not continue) and for stray parentheses
|
|
|
|
if ((n1 == nodes.begin()) && (n2 == nodes.end()))
|
|
break;
|
|
|
|
if ((n1 == nodes.begin()) != (n2 == nodes.end()))
|
|
return kParseError_StrayParenthesis;
|
|
|
|
// Copy child nodes, determine if this is a function call and how many items it requires
|
|
// N.B. - here n1 is decremented to indicate the actual LHS parenthesis
|
|
|
|
NodeList childNodes(n1--, n2);
|
|
size_t numItems = 1;
|
|
|
|
if ((n1 != nodes.begin()) && (n1 - 1)->isSymbol())
|
|
{
|
|
// Check that a function is found
|
|
|
|
if (!(function = mFunctions.getItem((n1 - 1)->getTokenString())))
|
|
return kParseError_NotFunction;
|
|
|
|
numItems = function->numItems();
|
|
n1--;
|
|
}
|
|
|
|
// Parse the child nodes and then the function (if it is a function)
|
|
|
|
if ((error = recursiveParse(graph, childNodes, numItems)))
|
|
return error;
|
|
|
|
if (function)
|
|
parseFunction(graph, function, childNodes);
|
|
|
|
// Copy the result and then erase the unneeded nodes from this level
|
|
|
|
*n1 = childNodes[0];
|
|
n2 = nodes.erase(n1 + 1, n2 + 1);
|
|
}
|
|
|
|
// Commas
|
|
|
|
for (n1 = nodes.begin(); n1 != nodes.end(); n1++)
|
|
{
|
|
// Find the next comma
|
|
|
|
for (n2 = n1; n2 != nodes.end(); n2++)
|
|
if (n2->isComma())
|
|
break;
|
|
|
|
// Check that there are some items to parse
|
|
|
|
if (n1 == n2)
|
|
return kParseError_EmptySubExpr;
|
|
|
|
// Copy the child nodes and erase from this level, then parse the remaining operators
|
|
|
|
NodeList childNodes(n1, n2);
|
|
nodes.erase(n1 + 1, (n2 == nodes.end()) ? n2 : n2 + 1);
|
|
if ((error = parseOperators(graph, *n1, childNodes)))
|
|
return error;
|
|
}
|
|
|
|
// Check that we have the required number of nodes
|
|
|
|
if (nodes.size() == resultItems)
|
|
return kNoError;
|
|
|
|
return (nodes.size() < resultItems) ? kParseError_TooFewCommas : kParseError_TooManyCommas;
|
|
}
|
|
|
|
private:
|
|
|
|
std::vector<OperatorList> mOperators;
|
|
OperatorList mFunctions;
|
|
std::vector<Constant> mConstants;
|
|
|
|
std::vector<char> mOperatorCharList;
|
|
size_t mMaxLengthOp;
|
|
|
|
T mDefaultConstant;
|
|
};
|
|
}
|
|
|
|
#endif
|