mathomatic/parse.c

933 lines
21 KiB
C

/*
* Expression parsing routines for Mathomatic.
*
* Copyright (C) 1987-2012 George Gesslein II.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
The chief copyright holder can be contacted at gesslein@mathomatic.org, or
George Gesslein II, P.O. Box 224, Lansing, NY 14882-0224 USA.
*/
#include "includes.h"
/*
* Convert all alphabetic characters in a string to lower case.
*/
void
str_tolower(cp)
char *cp;
{
if (cp) {
for (; *cp; cp++) {
if (isascii(*cp) && isupper(*cp))
*cp = tolower(*cp);
}
}
}
/*
* Display an up arrow pointing to the error, if appropriate.
* The up arrow is positioned under the input string,
* followed by the error message, which must be a constant string.
*/
void
put_up_arrow(cnt, cp)
int cnt; /* position of error, relative to "input_column" */
char *cp; /* error message (constant ASCII string) */
{
#if !SILENT && !LIBRARY
int i;
cnt += input_column;
if (!quiet_mode && point_flag && (screen_columns == 0 || cnt < screen_columns)) {
for (i = 0; i < cnt; i++) {
printf(" ");
}
printf("^ ");
}
#endif
error(cp);
}
/*
* Return true if character is a valid starting variable character.
*/
int
isvarchar(ch)
int ch;
{
if (isdigit(ch)) { /* variable names can never start with a digit */
return false;
}
return(ch == '_' || (ch && strchr(special_variable_characters, ch)) || isalpha(ch));
}
/*
* Return +1 for an opening parenthesis, -1 for a closing parenthesis.
* Otherwise, return 0.
*/
int
paren_increment(ch)
int ch;
{
switch (ch) {
case '(':
return 1;
case ')':
return -1;
}
return 0;
}
/*
* Return true if character (ch) is the beginning of a Mathomatic operator.
*/
int
is_mathomatic_operator(ch)
int ch;
{
switch (ch) {
case '!':
case '*':
case '^':
case '/':
case '%':
case '+':
case '-':
case '=':
case '|':
return true;
}
return false;
}
/*
* Parenthesize an operator.
*/
void
binary_parenthesize(p1, n, i)
token_type *p1; /* pointer to expression */
int n; /* length of expression */
int i; /* location of operator to parenthesize in expression */
{
int j;
int level;
int skip_negate;
#if DEBUG
if (i >= (n - 1) || (n & 1) != 1 || (i & 1) != 1 || p1[i].kind != OPERATOR) {
error_bug("Internal error in arguments to binary_parenthesize().");
}
#endif
skip_negate = (p1[i].token.operatr != NEGATE);
level = p1[i].level++;
if (p1[i-1].level++ > level) {
for (j = i - 2; j >= 0; j--) {
if (p1[j].level <= level)
break;
p1[j].level++;
}
}
finish:
if (p1[i+1].level++ > level) {
for (j = i + 2; j < n; j++) {
if (p1[j].level <= level)
break;
p1[j].level++;
}
} else if (skip_negate && p1[i+1].level > level && i + 3 < n && p1[i+2].level == level
&& p1[i+2].token.operatr == NEGATE) {
p1[i+2].level++;
i += 2;
goto finish;
}
}
/*
* Handle and remove the special NEGATE operator.
*/
void
handle_negate(equation, np)
token_type *equation;
int *np;
{
int i;
for (i = 1; i < *np; i += 2) {
if (equation[i].token.operatr == NEGATE) {
/* parenthesize it first: */
binary_parenthesize(equation, *np, i);
/* finish changing negate operator to -1.0 times the operand: */
equation[i].token.operatr = TIMES;
}
}
}
/*
* Parenthesize most operators so that expression evaluation is in the correct order.
* Gives different operators on the same level in an expression the correct priority.
* Similar operators on the same level are always evaluated or grouped left to right,
* except for the power operator.
*
* organize() should be called after this to remove unneeded parentheses.
*/
void
give_priority(equation, np)
token_type *equation; /* pointer to expression */
int *np; /* pointer to expression length */
{
int i;
/* Higher priority (precedence) operators need to be parenthesized first: */
for (i = 1; i < *np; i += 2) {
if (equation[i].token.operatr >= FACTORIAL) {
binary_parenthesize(equation, *np, i);
}
}
if (right_associative_power) {
for (i = *np - 2; i >= 1; i -= 2) { /* count down (for right to left evaluation) */
if (equation[i].token.operatr == POWER) {
binary_parenthesize(equation, *np, i);
}
}
} else {
for (i = 1; i < *np; i += 2) { /* count up (for left to right evaluation) */
if (equation[i].token.operatr == POWER) {
binary_parenthesize(equation, *np, i);
}
}
}
for (i = 1; i < *np; i += 2) {
switch (equation[i].token.operatr) {
case TIMES:
case DIVIDE:
case MODULUS:
case IDIVIDE:
binary_parenthesize(equation, *np, i);
break;
}
}
handle_negate(equation, np); /* Make this the first called function here to make negate highest priority. */
}
/*
* This is a simple, non-recursive mathematical expression parser.
* To parse, the character string is sequentially read and stored
* in the internal storage format.
* The maximum length that can be parsed is the size of an equation side (n_tokens).
* Any syntax and other errors are carefully reported and give a NULL return.
*
* Returns the new string position, or NULL if error.
*/
char *
parse_section(equation, np, cp, allow_space)
token_type *equation; /* where the parsed expression is stored (equation side) */
int *np; /* pointer to the returned parsed expression length */
char *cp; /* string to parse */
int allow_space; /* if false, any space characters terminate parsing */
{
int i;
int n = 0, old_n; /* position in equation[] */
int cur_level = 1; /* current level of parentheses */
int operand = false; /* flip-flop between operand and operator */
char *cp_start, *cp1;
double d;
int abs_count = 0;
int abs_array[10];
if (cp == NULL)
return(NULL);
cp_start = cp;
for (;; cp++) {
if (n > (n_tokens - 10)) {
error_huge();
}
switch (*cp) {
case '(':
case '{':
if (operand) {
#if 1
/* Allow things like (x)(y) and x{y} by defaulting to the times operator. The result is x*y. */
operand = false;
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = TIMES;
n++;
#else
goto syntax_error;
#endif
}
cur_level++;
continue;
case ')':
case '}':
cur_level--;
if (cur_level <= 0 || (abs_count > 0 && cur_level < abs_array[abs_count-1])) {
put_up_arrow(cp - cp_start, _("Unmatched parenthesis: too many )"));
return(NULL);
}
if (!operand) {
goto syntax_error;
}
continue;
case ' ':
case '\t':
case '\f':
if (allow_space)
continue;
case ',':
case '=':
case ';':
case '\0':
case '\n':
goto p_out; /* terminator encountered */
case '\r':
continue;
case '\033':
if (cp[1] == '[' || cp[1] == 'O') {
error(_("Cursor or function key string encountered, unable to interpret."));
return(NULL);
}
continue;
}
operand = !operand;
switch (*cp) {
case '|':
if (operand) {
if (abs_count >= ARR_CNT(abs_array)) {
error(_("Too many nested absolute values."));
return(NULL);
}
cur_level += 3;
abs_array[abs_count++] = cur_level;
} else {
if (abs_count <= 0 || cur_level != abs_array[--abs_count]) {
goto syntax_error;
}
cur_level--;
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = POWER;
n++;
equation[n].level = cur_level;
equation[n].kind = CONSTANT;
equation[n].token.constant = 2.0;
n++;
cur_level--;
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = POWER;
n++;
equation[n].level = cur_level;
equation[n].kind = CONSTANT;
equation[n].token.constant = 0.5;
n++;
cur_level--;
}
operand = !operand;
break;
case '!':
if (operand) {
goto syntax_error;
}
if (cp[1] == '!' && cp[2] != '!') {
warning(_("Multifactorial not implemented, using x!! = (x!)!"));
}
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = FACTORIAL;
n++;
equation[n].level = cur_level;
equation[n].kind = CONSTANT;
equation[n].token.constant = 1.0;
n++;
operand = true;
break;
case '^':
parse_power:
if (operand) {
goto syntax_error;
}
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = POWER;
n++;
break;
case '*':
if (cp[1] == '*') { /* for "**" */
cp++;
goto parse_power;
}
if (operand) {
goto syntax_error;
}
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = TIMES;
n++;
break;
case '/':
if (operand) {
goto syntax_error;
}
if (cp[1] == '/') {
cp++;
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = IDIVIDE;
} else {
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = DIVIDE;
}
n++;
break;
case '%':
if (operand) {
#if 1
/* Allow Maxima's %e, %i, and %pi by ignoring % in the wrong place. */
if (isalpha(cp[1])) {
operand = false;
break;
}
#endif
goto syntax_error;
}
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = MODULUS;
n++;
break;
case '+':
case '-':
if (!operand) {
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = ((*cp == '+') ? PLUS : MINUS);
n++;
}
if (strncmp(cp, "+/-", 3) == 0) {
equation[n].level = cur_level;
equation[n].kind = VARIABLE;
next_sign(&equation[n].token.variable);
n++;
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = TIMES;
n++;
cp += 2;
operand = false;
break;
}
if (!operand) {
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '.':
if (!operand) {
operand = true;
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = TIMES;
n++;
}
if (*cp == '-') {
equation[n].kind = CONSTANT;
equation[n].token.constant = -1.0;
equation[n].level = cur_level;
n++;
equation[n].kind = OPERATOR;
equation[n].token.operatr = NEGATE;
equation[n].level = cur_level;
n++;
operand = false;
continue;
}
cp1 = cp;
errno = 0;
d = strtod(cp1, &cp);
if (cp == cp1) {
goto syntax_error;
}
if (errno) {
put_up_arrow(cp1 - cp_start, _("Constant out of range."));
return(NULL);
}
equation[n].kind = CONSTANT;
equation[n].token.constant = d;
equation[n].level = cur_level;
n++;
cp--;
break;
case '#':
if (!operand) {
goto syntax_error;
}
cp++;
cp1 = NULL;
switch (*cp) {
case '+':
case '-':
i = strtol(cp, &cp1, 10);
i = cur_equation + i;
break;
default:
i = strtol(cp, &cp1, 10) - 1;
break;
}
if (cp1 == NULL || cp == cp1) {
put_up_arrow(cp - cp_start, _("Error parsing equation space number after #."));
return NULL;
}
if (empty_equation_space(i)) {
put_up_arrow(cp - cp_start, _("No expression available in # specified equation space."));
return NULL;
}
cp = cp1 - 1;
old_n = n;
if (n_rhs[i]) {
n += n_rhs[i];
if (n > n_tokens) {
error_huge();
}
blt(&equation[old_n], rhs[i], n_rhs[i] * sizeof(token_type));
} else {
n += n_lhs[i];
if (n > n_tokens) {
error_huge();
}
blt(&equation[old_n], lhs[i], n_lhs[i] * sizeof(token_type));
}
for (; old_n < n; old_n++) {
equation[old_n].level += cur_level;
}
break;
default:
if (!isvarchar(*cp)) {
put_up_arrow(cp - cp_start, _("Unrecognized character."));
return(NULL);
}
if (!operand) {
operand = true;
equation[n].level = cur_level;
equation[n].kind = OPERATOR;
equation[n].token.operatr = TIMES;
n++;
}
cp1 = cp;
if (strncasecmp(cp, "inf", strlen("inf")) == 0
&& !isvarchar(cp[strlen("inf")])) {
equation[n].kind = CONSTANT;
equation[n].token.constant = INFINITY; /* the infinity constant */
cp += strlen("inf");
} else if (strncasecmp(cp, INFINITY_NAME, strlen(INFINITY_NAME)) == 0
&& !isvarchar(cp[strlen(INFINITY_NAME)])) {
equation[n].kind = CONSTANT;
equation[n].token.constant = INFINITY; /* the infinity constant */
cp += strlen(INFINITY_NAME);
} else {
equation[n].kind = VARIABLE;
cp = parse_var(&equation[n].token.variable, cp);
if (cp == NULL) {
return(NULL);
}
}
if (*cp == '(') {
/* Named functions currently not implemented, except when using m4. */
#if LIBRARY
put_up_arrow(cp1 - cp_start, _("Unknown function."));
#else
put_up_arrow(cp1 - cp_start, _("Unknown function; try using rmath, which allows basic functions."));
#endif
return(NULL);
}
cp--;
equation[n].level = cur_level;
n++;
break;
}
}
p_out:
if (abs_count != 0 || (n && !operand)) {
goto syntax_error;
}
if (cur_level != 1) {
put_up_arrow(cp - cp_start, _("Unmatched parenthesis: missing )"));
return(NULL);
}
while (*cp == '=')
cp++;
*np = n;
if (n) {
give_priority(equation, np);
organize(equation, np);
}
input_column += (cp - cp_start);
return cp;
syntax_error:
put_up_arrow(cp - cp_start, _("Syntax error."));
return(NULL);
}
/*
* Parse an equation string into equation space "n".
* The result will not necessarily be an equation,
* because this can parse any expression or equation.
*
* Returns the new string position, or NULL if error.
* Currently, there can be no more to parse in the string when this returns.
*/
char *
parse_equation(n, cp)
int n; /* equation space number */
char *cp; /* pointer to the beginning of the equation character string */
{
if ((cp = parse_expr(lhs[n], &n_lhs[n], cp, true)) != NULL) {
if ((cp = parse_expr(rhs[n], &n_rhs[n], cp, true)) != NULL) {
if (!extra_characters(cp))
return cp;
}
}
n_lhs[n] = 0;
n_rhs[n] = 0;
return NULL;
}
/*
* Parse an expression (not an equation) string, with equation space pull
* if the string contains "#" followed by an equation number.
*
* Returns the new string position, or NULL if error.
*/
char *
parse_expr(equation, np, cp, allow_space)
token_type *equation; /* where the parsed expression is stored (equation side) */
int *np; /* pointer to the returned parsed expression length */
char *cp; /* string to parse */
int allow_space; /* if true, allow and ignore space characters; if false, space means terminate parsing */
{
if (cp == NULL)
return NULL;
if (!case_sensitive_flag) {
str_tolower(cp);
}
cp = parse_section(equation, np, cp, allow_space);
return cp;
}
/*
* Parse variable name string pointed to by "cp".
* The variable name is converted to Mathomatic format and stored in "*vp".
*
* If the variable is not special and never existed before, it is created.
*
* Return new string position if successful.
* Display error message and return NULL on failure.
*/
char *
parse_var(vp, cp)
long *vp;
char *cp;
{
int i, j;
long vtmp;
char buf[MAX_VAR_LEN+1];
char *cp1;
int len;
int level; /* parentheses level */
int (*strcmpfunc)();
if (case_sensitive_flag) {
strcmpfunc = strcmp;
} else {
strcmpfunc = strcasecmp;
}
if (!isvarchar(*cp) || paren_increment(*cp) < 0) {
error(_("Invalid variable."));
return(NULL); /* variable name must start with a valid variable character */
}
for (level = 0, cp1 = cp, i = 0; *cp1;) {
if (level <= 0 && !isvarchar(*cp1)) {
break;
}
j = paren_increment(*cp1);
level += j;
if (level < 0)
break;
if (i >= MAX_VAR_LEN) {
error(_("Variable name too long."));
return(NULL);
}
buf[i++] = *cp1++;
if (j == -1 && level <= 0)
break;
}
buf[i] = '\0';
if (level > 0) {
error(_("Unmatched parenthesis: missing )"));
return(NULL);
}
if (strcasecmp(buf, NAN_NAME) == 0) {
warning(_("Attempt to enter NaN (Not a Number); Converted to variable."));
}
if (strcasecmp(buf, "inf") == 0 || strcasecmp(buf, INFINITY_NAME) == 0) {
error(_("Infinity cannot be used as a variable."));
return(NULL);
} else if ((*strcmpfunc)(buf, "sign") == 0) {
vtmp = SIGN;
} else {
if (strncasecmp(cp, "i#", 2) == 0) {
*vp = IMAGINARY;
return(cp + 2);
}
if (strncasecmp(cp, "e#", 2) == 0) {
*vp = V_E;
return(cp + 2);
}
if (strncasecmp(cp, "pi#", 3) == 0) {
*vp = V_PI;
return(cp + 3);
}
for (level = 0, cp1 = cp, i = 0; *cp1;) {
if (level <= 0 && !isvarchar(*cp1) && !isdigit(*cp1)) {
break;
}
j = paren_increment(*cp1);
level += j;
if (level < 0)
break;
if (i >= MAX_VAR_LEN) {
error(_("Variable name too long."));
return(NULL);
}
buf[i++] = *cp1++;
if (j == -1 && level <= 0)
break;
}
if (i <= 0) {
error(_("Empty variable name parsed!"));
return(NULL);
}
buf[i] = '\0';
if (level > 0) {
error(_("Unmatched parenthesis: missing )"));
return(NULL);
}
if ((*strcmpfunc)(buf, "i") == 0) {
*vp = IMAGINARY;
return(cp1);
}
if ((*strcmpfunc)(buf, "e") == 0) {
*vp = V_E;
return(cp1);
}
if ((*strcmpfunc)(buf, "pi") == 0) {
*vp = V_PI;
return(cp1);
}
if (is_all(buf)) {
error(_("\"all\" is a reserved word and may not be used as a variable name."));
return(NULL);
}
vtmp = 0;
for (i = 0; var_names[i]; i++) {
if ((*strcmpfunc)(buf, var_names[i]) == 0) {
vtmp = i + VAR_OFFSET;
break;
}
}
if (vtmp == 0) {
if (i >= (MAX_VAR_NAMES - 1)) {
error(_("Maximum number of variable names reached."));
#if !SILENT
printf(_("Please restart or use \"clear all\".\n"));
#endif
return(NULL);
}
len = strlen(buf) + 1;
var_names[i] = (char *) malloc(len);
if (var_names[i] == NULL) {
error(_("Out of memory (can't malloc(3) variable name)."));
return(NULL);
}
blt(var_names[i], buf, len);
vtmp = i + VAR_OFFSET;
var_names[i+1] = NULL;
}
*vp = vtmp;
return cp1;
}
/* for "sign" variables: */
if (isdigit(*cp1)) {
j = strtol(cp1, &cp1, 10);
if (j < 0 || j > MAX_SUBSCRIPT) {
error(_("Maximum subscript exceeded in special variable name."));
return(NULL);
}
if (vtmp == SIGN) {
sign_array[j+1] = true;
}
vtmp += ((long) (j + 1)) << VAR_SHIFT;
} else if (vtmp == SIGN) {
sign_array[0] = true;
}
*vp = vtmp;
return cp1;
}
/*
* Remove trailing spaces from a string.
*/
void
remove_trailing_spaces(cp)
char *cp;
{
int i;
i = strlen(cp) - 1;
while (i >= 0 && isspace(cp[i])) {
cp[i] = '\0';
i--;
}
}
/*
* This should be called for all line input.
* Set point_flag to true if pointing to the input error will work for the passed string.
* Truncate string to the actual content.
*/
void
set_error_level(cp)
char *cp; /* input string */
{
char *cp1;
int len;
point_flag = true;
/* handle comments, line breaks, and the DOS EOF character (control-Z) by truncating the string where found */
cp1 = cp;
while ((cp1 = strpbrk(cp1, ";\n\r\032"))) {
if (cp1 > cp && *(cp1 - 1) == '\\') {
if (*cp1 == ';') {
/* skip backslash escaped semicolon */
point_flag = false;
len = strlen(cp1);
blt(cp1 - 1, cp1, len + 1);
continue;
}
}
break;
}
if (cp1) {
*cp1 = '\0';
}
remove_trailing_spaces(cp);
/* set point_flag to false if non-printable characters encountered */
for (cp1 = cp; *cp1; cp1++) {
if (!isprint(*cp1)) {
point_flag = false;
}
}
}
/*
* Return true if passed variable "v" is a constant.
* Return value of constant in "*dp".
*/
int
var_is_const(v, dp)
long v;
double *dp;
{
if (v == V_E) {
if (dp)
*dp = M_E;
return true;
}
if (v == V_PI) {
if (dp)
*dp = M_PI;
return true;
}
return false;
}
/*
* Substitute E and PI variables with their respective constants.
*
* Return true if anything was substituted (therefore approximated).
*/
int
subst_constants(equation, np)
token_type *equation;
int *np;
{
int i;
int modified = false;
double d;
for (i = 0; i < *np; i += 2) {
if (equation[i].kind == VARIABLE) {
if (var_is_const(equation[i].token.variable, &d)) {
equation[i].kind = CONSTANT;
equation[i].token.constant = d;
modified = true;
}
}
}
return modified;
}
#ifndef my_strlcpy
/*
* A very efficient strlcpy(), which isn't available in some distributions, hence this code.
*
* Copy up to (n - 1) characters from string in src
* to dest and null-terminate the result.
*
* Return length of src.
*/
int
my_strlcpy(dest, src, n)
char *dest, *src;
int n;
{
int len, len_src;
len_src = strlen(src);
if (n > 0) {
len = min(n - 1, len_src);
memmove(dest, src, len);
dest[len] = '\0';
}
return len_src;
}
#endif