1 |
134 |
jt_eaton |
#!/usr/bin/env perl
|
2 |
|
|
# simple-math-parser.pl - Simple math parser written in Perl 5
|
3 |
|
|
# License: C.C. Attribution NonCommercial ShareAlike 3.0 Unported
|
4 |
|
|
# Revision: 120705
|
5 |
|
|
|
6 |
|
|
#---------------------------------------------------------------------
|
7 |
|
|
# important note
|
8 |
|
|
#---------------------------------------------------------------------
|
9 |
|
|
|
10 |
|
|
# This software is provided on an AS IS basis with ABSOLUTELY NO WAR-
|
11 |
|
|
# RANTY. The entire risk as to the quality and performance of the
|
12 |
|
|
# software is with you. Should the software prove defective, you as-
|
13 |
|
|
# sume the cost of all necessary servicing, repair or correction. In
|
14 |
|
|
# no event will any of the developers, or any other party, be liable
|
15 |
|
|
# to anyone for damages arising out of use of the software, or inabil-
|
16 |
|
|
# ity to use the software.
|
17 |
|
|
|
18 |
|
|
#---------------------------------------------------------------------
|
19 |
|
|
# overview
|
20 |
|
|
#---------------------------------------------------------------------
|
21 |
|
|
|
22 |
|
|
my $USAGE_TEXT = << 'END_OF_USAGE_TEXT';
|
23 |
|
|
Usage: simple-math-parser.pl "1.5+(2/3)*pi-sqrt(2)"
|
24 |
|
|
|
25 |
|
|
This is a CLI calculator program that takes a single arithmetic ex-
|
26 |
|
|
pression as an argument, evaluates it, and prints the result to stand-
|
27 |
|
|
ard output.
|
28 |
|
|
|
29 |
|
|
As a general rule, expressions should be quoted as shown here. Other-
|
30 |
|
|
wise, "shell meta-character" problems may occur.
|
31 |
|
|
|
32 |
|
|
The point of the program is to illustrate the structure of a simple
|
33 |
|
|
math parser. A recursive-descent approach is used; the core consists
|
34 |
|
|
of a single recursive routine (ParseMath).
|
35 |
|
|
|
36 |
|
|
Numbers may be integers, ordinary real numbers, or real numbers in
|
37 |
|
|
scientific notation. Examples of scientific notation: 1.3e+0 is equal
|
38 |
|
|
to 1.3, 12e-1 is equal to 1.2, and 5e+1 is equal to 50. The "+" sign
|
39 |
|
|
is optional in this context.
|
40 |
|
|
|
41 |
|
|
Supported operators include + (add), - (subtract or unary minus), *
|
42 |
|
|
(multiply), / (divide), and ** (exponentiate).
|
43 |
|
|
|
44 |
|
|
Six functions are supported: sqrt (square root), cbrt (cube root), log
|
45 |
|
|
(natural logarithm), sin, cos, and tan. The last three functions take
|
46 |
|
|
angles in radians.
|
47 |
|
|
|
48 |
|
|
Three standard constants may be used: e, phi (the Golden Ratio), and
|
49 |
|
|
pi.
|
50 |
|
|
|
51 |
|
|
This is the first public release of the program. Therefore, it should
|
52 |
|
|
be considered alpha and bugs may exist.
|
53 |
|
|
END_OF_USAGE_TEXT
|
54 |
|
|
|
55 |
|
|
#---------------------------------------------------------------------
|
56 |
|
|
# standard module setup
|
57 |
|
|
#---------------------------------------------------------------------
|
58 |
|
|
|
59 |
|
|
require 5.8.1;
|
60 |
|
|
use strict;
|
61 |
|
|
use Carp;
|
62 |
|
|
use warnings;
|
63 |
|
|
# Trap warnings
|
64 |
|
|
$SIG{__WARN__} = sub { die @_; };
|
65 |
|
|
|
66 |
|
|
#---------------------------------------------------------------------
|
67 |
|
|
# basic constants
|
68 |
|
|
#---------------------------------------------------------------------
|
69 |
|
|
|
70 |
|
|
use constant ZERO => 0; # Zero
|
71 |
|
|
use constant ONE => 1; # One
|
72 |
|
|
use constant TWO => 2; # Two
|
73 |
|
|
|
74 |
|
|
use constant FALSE => 0; # Boolean FALSE
|
75 |
|
|
use constant TRUE => 1; # Boolean TRUE
|
76 |
|
|
|
77 |
|
|
#---------------------------------------------------------------------
|
78 |
|
|
# common math constants
|
79 |
|
|
#---------------------------------------------------------------------
|
80 |
|
|
|
81 |
|
|
# This table maps one or more symbol names to associated numeric val-
|
82 |
|
|
# ues.
|
83 |
|
|
|
84 |
|
|
# Note: Symbol names should consist of a letter followed by zero or
|
85 |
|
|
# more alphanumeric characters. Letters should be specified in lower
|
86 |
|
|
# case.
|
87 |
|
|
|
88 |
|
|
my %MathConstants =
|
89 |
|
|
(
|
90 |
|
|
'e' => '2.7182818284590452353603' ,
|
91 |
|
|
'phi' => '1.6180339887498948482046' ,
|
92 |
|
|
'pi' => '3.1415926535897932384626'
|
93 |
|
|
);
|
94 |
|
|
|
95 |
|
|
#---------------------------------------------------------------------
|
96 |
|
|
# program parameters
|
97 |
|
|
#---------------------------------------------------------------------
|
98 |
|
|
|
99 |
|
|
# $IE = Internal-error message prefix
|
100 |
|
|
# $MAXPRE = Maximum precedence level
|
101 |
|
|
# $PURPOSE = Short description of purpose
|
102 |
|
|
# $REVISION = Revision string
|
103 |
|
|
# $USE_LESS = Flag: Use "less" for usage text
|
104 |
|
|
|
105 |
|
|
my $IE = 'Internal error' ;
|
106 |
|
|
my $MAXPRE = 9999 ;
|
107 |
|
|
my $PURPOSE = 'Simple Perl math parser' ;
|
108 |
|
|
my $REVISION = '120705' ;
|
109 |
|
|
my $USE_LESS = TRUE ;
|
110 |
|
|
|
111 |
|
|
#---------------------------------------------------------------------
|
112 |
|
|
# token-related patterns
|
113 |
|
|
#---------------------------------------------------------------------
|
114 |
|
|
|
115 |
|
|
# $PatNumScience = Matches a non-negative number in scientific nota-
|
116 |
|
|
# tion
|
117 |
|
|
# $PatNumRegular = Matches an ordinary non-negative number
|
118 |
|
|
# $PatSymbol = Matches a symbol
|
119 |
|
|
# $PatOperator = Matches a parenthesis or an operator
|
120 |
|
|
|
121 |
|
|
# Note: In this context, exponentiation is represented by the single-
|
122 |
|
|
# character operator "~" as opposed to "**", which is used at a higher
|
123 |
|
|
# level. This simplifies the code.
|
124 |
|
|
|
125 |
|
|
my $PatNumScience = '\b\d+\.?\d*e[\+\-]?\d+' ;
|
126 |
|
|
my $PatNumRegular = '\b\d+\.?\d*' ;
|
127 |
|
|
my $PatSymbol = '\b[a-z]\w+\b' ;
|
128 |
|
|
my $PatOperator = '[\(\)\+\-\*/~]' ;
|
129 |
|
|
|
130 |
|
|
#---------------------------------------------------------------------
|
131 |
|
|
|
132 |
|
|
# @TokenPatterns is a list of all of the patterns that are used to
|
133 |
|
|
# match tokens.
|
134 |
|
|
|
135 |
|
|
my @TokenPatterns =
|
136 |
|
|
( # Note: Order is significant here
|
137 |
|
|
$PatNumScience , $PatNumRegular , $PatSymbol , $PatOperator
|
138 |
|
|
);
|
139 |
|
|
|
140 |
|
|
#---------------------------------------------------------------------
|
141 |
|
|
|
142 |
|
|
# $TokenPatterns is a pattern that matches a token (of any supported
|
143 |
|
|
# type). The pattern omits enclosing parentheses.
|
144 |
|
|
|
145 |
|
|
my $TokenPatterns = join '|', @TokenPatterns;
|
146 |
|
|
|
147 |
|
|
#---------------------------------------------------------------------
|
148 |
|
|
# misc. global variables
|
149 |
|
|
#---------------------------------------------------------------------
|
150 |
|
|
|
151 |
|
|
my $PROGNAME; # Program name (without path)
|
152 |
|
|
$PROGNAME = $0;
|
153 |
|
|
$PROGNAME =~ s@.*/@@;
|
154 |
|
|
|
155 |
|
|
#---------------------------------------------------------------------
|
156 |
|
|
# support routines
|
157 |
|
|
#---------------------------------------------------------------------
|
158 |
|
|
|
159 |
|
|
# Routine: UsageError
|
160 |
|
|
# Purpose: Prints program usage text and exits
|
161 |
|
|
# Usage: &UsageError();
|
162 |
|
|
|
163 |
|
|
# If the global parameter $USE_LESS is TRUE, and if standard output is
|
164 |
|
|
# a terminal, usage text is piped through "less" (with some "less"-
|
165 |
|
|
# related instructions added). Otherwise, usage text is simply sent to
|
166 |
|
|
# standard output.
|
167 |
|
|
|
168 |
|
|
#---------------------------------------------------------------------
|
169 |
|
|
|
170 |
|
|
sub UsageError
|
171 |
|
|
{
|
172 |
|
|
$USAGE_TEXT =~ s@^\s+@@s; # Remove leading white space
|
173 |
|
|
|
174 |
|
|
$USAGE_TEXT = << "END"; # "END" must be double-quoted here
|
175 |
|
|
$PROGNAME $REVISION - $PURPOSE
|
176 |
|
|
|
177 |
|
|
$USAGE_TEXT
|
178 |
|
|
END
|
179 |
|
|
# Adjust trailing white space
|
180 |
|
|
$USAGE_TEXT =~ s@\s*\z@\n@s;
|
181 |
|
|
|
182 |
|
|
if ($USE_LESS && (-t STDOUT) && open (OFD, "|/usr/bin/less"))
|
183 |
|
|
{ # Handle output with "less"
|
184 |
|
|
# "END" must be double-quoted here
|
185 |
|
|
$USAGE_TEXT = << "END";
|
186 |
|
|
To exit this "help" text, press "q" or "Q". To scroll up or down, use
|
187 |
|
|
PGUP, PGDN, or the arrow keys.
|
188 |
|
|
|
189 |
|
|
$USAGE_TEXT
|
190 |
|
|
END
|
191 |
|
|
print OFD $USAGE_TEXT;
|
192 |
|
|
close OFD;
|
193 |
|
|
}
|
194 |
|
|
else
|
195 |
|
|
{ # Handle output without "less"
|
196 |
|
|
print "\n", $USAGE_TEXT, "\n";
|
197 |
|
|
}
|
198 |
|
|
|
199 |
|
|
exit ONE;
|
200 |
|
|
}
|
201 |
|
|
|
202 |
|
|
#---------------------------------------------------------------------
|
203 |
|
|
# parser routine
|
204 |
|
|
#---------------------------------------------------------------------
|
205 |
|
|
|
206 |
|
|
# Routine: ParseMath
|
207 |
|
|
# Purpose: Parses a list of math-related tokens
|
208 |
|
|
|
209 |
|
|
# Usage:
|
210 |
|
|
#
|
211 |
|
|
# my @tokens = ( '1', '+', '2', '/', '3' );
|
212 |
|
|
# my $result = &ParseMath (\@tokens, 0);
|
213 |
|
|
|
214 |
|
|
# Note: "ParseMath" is recursive.
|
215 |
|
|
|
216 |
|
|
# This routine takes two arguments: A reference (i.e., pointer) to a
|
217 |
|
|
# list of tokens and an integer, which should be zero unless the rou-
|
218 |
|
|
# tine happens to be calling itself (in which case it may use other
|
219 |
|
|
# values internally).
|
220 |
|
|
|
221 |
|
|
# Tokens may be non-negative integer or real numbers, plus or minus
|
222 |
|
|
# signs, a multiplication or division or exponentiation operator
|
223 |
|
|
# (*, /, or ~), parentheses, or the names of supported functions or
|
224 |
|
|
# constants.
|
225 |
|
|
|
226 |
|
|
# Note: "~" is used at this level instead of the more usual "**" as a
|
227 |
|
|
# matter of convenience. Higher-level code may map "**" or other char-
|
228 |
|
|
# acters or sequences to "~".
|
229 |
|
|
|
230 |
|
|
# Six functions are supported: sqrt (square root), cbrt (cube root),
|
231 |
|
|
# log (natural logarithm), sin, cos, and tan. The last three functions
|
232 |
|
|
# take angles in radians.
|
233 |
|
|
|
234 |
|
|
# Three standard constants may be used: e, phi (the Golden Ratio),
|
235 |
|
|
# and pi.
|
236 |
|
|
|
237 |
|
|
# Sub-expressions may be parenthesized. PEMDAS (i.e., standard prece-
|
238 |
|
|
# dence) rules are supported.
|
239 |
|
|
|
240 |
|
|
# For numbers, scientific notation is supported. Examples of scienti-
|
241 |
|
|
# fic notation: 1.3e+0 is equal to 1.3, 12e-1 is equal to 1.2, and
|
242 |
|
|
# 5e+1 is equal to 50. The "+" sign is optional in this context.
|
243 |
|
|
|
244 |
|
|
# Minus signs, as in the unary minus operator, must be specified as
|
245 |
|
|
# separate tokens.
|
246 |
|
|
|
247 |
|
|
#---------------------------------------------------------------------
|
248 |
|
|
|
249 |
|
|
sub ParseMath
|
250 |
|
|
{
|
251 |
|
|
# Argument list
|
252 |
|
|
my ($p_tokens, $plevel) = @_;
|
253 |
|
|
my $left; # Left operand (or token)
|
254 |
|
|
my $right; # Right operand
|
255 |
|
|
my $result; # Result
|
256 |
|
|
my $str; # Scratch
|
257 |
|
|
|
258 |
|
|
#---------------------------------------------------------------------
|
259 |
|
|
# Initial setup.
|
260 |
|
|
|
261 |
|
|
$left = shift (@$p_tokens); # Get first token
|
262 |
|
|
# Consistency check
|
263 |
|
|
die "$IE #0001\n" unless defined $left;
|
264 |
|
|
|
265 |
|
|
#---------------------------------------------------------------------
|
266 |
|
|
# Various cases.
|
267 |
|
|
|
268 |
|
|
# This block handles symbolic constants (such as pi), parenthesized
|
269 |
|
|
# sub-expressions, the unary-minus operator, and functions such as
|
270 |
|
|
# "cos" or "sqrt".
|
271 |
|
|
|
272 |
|
|
if (defined ($str = $MathConstants {$left}))
|
273 |
|
|
{ $left = $str; }
|
274 |
|
|
elsif ($left eq '(')
|
275 |
|
|
{ $left = &ParseMath ($p_tokens, ZERO ); }
|
276 |
|
|
elsif ($left eq '-')
|
277 |
|
|
{ $left = &ParseMath ($p_tokens, $MAXPRE); $left = (-$left); }
|
278 |
|
|
elsif ($left =~ m@^(sqrt|cbrt|log|sin|cos|tan)\z@)
|
279 |
|
|
{ # Function
|
280 |
|
|
$str = $left; # Name of function
|
281 |
|
|
# Function argument
|
282 |
|
|
$left = &ParseMath ($p_tokens, TWO);
|
283 |
|
|
|
284 |
|
|
eval
|
285 |
|
|
{ # "eval" traps most errors
|
286 |
|
|
$left = sqrt ($left) if $str eq 'sqrt' ;
|
287 |
|
|
$left = $left ** (1/3) if $str eq 'cbrt' ;
|
288 |
|
|
$left = log ($left) if $str eq 'log' ;
|
289 |
|
|
$left = sin ($left) if $str eq 'sin' ;
|
290 |
|
|
$left = cos ($left) if $str eq 'cos' ;
|
291 |
|
|
$left = sin ($left) / cos ($left) if $str eq 'tan' ;
|
292 |
|
|
};
|
293 |
|
|
|
294 |
|
|
die $@ if $@; # Handle trapped errors
|
295 |
|
|
}
|
296 |
|
|
|
297 |
|
|
#---------------------------------------------------------------------
|
298 |
|
|
# Adjust and/or check intermediate result.
|
299 |
|
|
|
300 |
|
|
# This statement verifies that the current (left) operand has been re-
|
301 |
|
|
# duced to a number.
|
302 |
|
|
|
303 |
|
|
die "Error: Invalid syntax\n"
|
304 |
|
|
unless $left =~ m@^-?($PatNumScience|$PatNumRegular)\z@;
|
305 |
|
|
|
306 |
|
|
# This statement translates numbers that are still in scientific nota-
|
307 |
|
|
# tion to ordinary values (if possible).
|
308 |
|
|
|
309 |
|
|
$left = $left + ZERO if $left =~ m@e@;
|
310 |
|
|
|
311 |
|
|
#---------------------------------------------------------------------
|
312 |
|
|
# Handle binary operators.
|
313 |
|
|
|
314 |
|
|
while (TRUE)
|
315 |
|
|
{
|
316 |
|
|
# Get operator token
|
317 |
|
|
my $op = shift (@$p_tokens);
|
318 |
|
|
# Are we there yet?
|
319 |
|
|
if (!defined ($op) || ($op eq ')')) { $result = $left; last; }
|
320 |
|
|
# Consistency check
|
321 |
|
|
die "$IE #0002: $op\n" unless $op =~ m@[\+\-\*/~]\z@;
|
322 |
|
|
|
323 |
|
|
my $nlevel = ZERO; # Precedence level
|
324 |
|
|
$nlevel = ONE if ($op eq '*') || ($op eq '/');
|
325 |
|
|
$nlevel = TWO if ($op eq '~');
|
326 |
|
|
|
327 |
|
|
# Stop here due to precedence?
|
328 |
|
|
if ($plevel && ($plevel >= $nlevel))
|
329 |
|
|
{ # Yes
|
330 |
|
|
unshift (@$p_tokens, $op);
|
331 |
|
|
$result = $left;
|
332 |
|
|
last;
|
333 |
|
|
}
|
334 |
|
|
|
335 |
|
|
$plevel = $nlevel; # Step to new precedence level
|
336 |
|
|
# Parse right side of sub-expression
|
337 |
|
|
$right = &ParseMath ($p_tokens, $plevel);
|
338 |
|
|
|
339 |
|
|
eval
|
340 |
|
|
{ # Note: "eval" traps most errors
|
341 |
|
|
$result = $left + $right if $op eq '+';
|
342 |
|
|
$result = $left - $right if $op eq '-';
|
343 |
|
|
$result = $left * $right if $op eq '*';
|
344 |
|
|
$result = $left / $right if $op eq '/';
|
345 |
|
|
$result = $left ** $right if $op eq '~';
|
346 |
|
|
};
|
347 |
|
|
|
348 |
|
|
die $@ if $@; # Handle trapped errors
|
349 |
|
|
# Consistency check
|
350 |
|
|
die "$IE #0003: $op\n" unless defined $result;
|
351 |
|
|
|
352 |
|
|
# Are we there yet?
|
353 |
|
|
last unless scalar @$p_tokens;
|
354 |
|
|
|
355 |
|
|
$plevel = ZERO; # We've resolved the left operand
|
356 |
|
|
$left = $result; # Result is left side of another sub-
|
357 |
|
|
# expression
|
358 |
|
|
}
|
359 |
|
|
|
360 |
|
|
#---------------------------------------------------------------------
|
361 |
|
|
# Wrap it up.
|
362 |
|
|
|
363 |
|
|
$result; # Return the result
|
364 |
|
|
}
|
365 |
|
|
|
366 |
|
|
#---------------------------------------------------------------------
|
367 |
|
|
# main routine
|
368 |
|
|
#---------------------------------------------------------------------
|
369 |
|
|
|
370 |
|
|
sub Main
|
371 |
|
|
{
|
372 |
|
|
my $data; # Input string
|
373 |
|
|
my @tokens; # Input tokens
|
374 |
|
|
|
375 |
|
|
#---------------------------------------------------------------------
|
376 |
|
|
# Initial setup.
|
377 |
|
|
|
378 |
|
|
select STDERR; $| = ONE; # Force STDERR flush on write
|
379 |
|
|
select STDOUT; $| = ONE; # Force STDOUT flush on write
|
380 |
|
|
|
381 |
|
|
# Check the command line
|
382 |
|
|
&UsageError() unless scalar (@ARGV) == ONE;
|
383 |
|
|
|
384 |
|
|
$data = shift (@ARGV); # Input string
|
385 |
|
|
$data =~ s@\s+@ @gs; # Adjust white space
|
386 |
|
|
# Check characters used
|
387 |
|
|
die "Invalid character in expression: $1\n"
|
388 |
|
|
if $data =~ m@([^a-z0-9\.\(\)\+\-\*/~ ])@i;
|
389 |
|
|
|
390 |
|
|
#---------------------------------------------------------------------
|
391 |
|
|
# Perform operations.
|
392 |
|
|
|
393 |
|
|
$data = lc ($data); # Map input to lower case
|
394 |
|
|
$data =~ s@\*\*@~@g; # Map "**" (exponentiation) to "~"
|
395 |
|
|
# (this simplifies the code)
|
396 |
|
|
# Put spaces around tokens
|
397 |
|
|
$data =~ s@($TokenPatterns)@ $1 @gi;
|
398 |
|
|
# Split data into a tokens list
|
399 |
|
|
@tokens = split m@\s+@, $data;
|
400 |
|
|
# Discard empty strings
|
401 |
|
|
@tokens = grep { length; } @tokens;
|
402 |
|
|
# Parse tokens and print result
|
403 |
|
|
print &ParseMath (\@tokens, ZERO), "\n";
|
404 |
|
|
undef;
|
405 |
|
|
}
|
406 |
|
|
|
407 |
|
|
#---------------------------------------------------------------------
|
408 |
|
|
# main program
|
409 |
|
|
#---------------------------------------------------------------------
|
410 |
|
|
|
411 |
|
|
&Main(); # Call the main routine
|
412 |
|
|
exit ZERO; # Normal exit
|