URL https://opencores.org/ocsvn/socgen/socgen/trunk
Subversion Repositories socgen

[/] [socgen/] [trunk/] [tools/] [math/] [parser] - Rev 135

Go to most recent revision | Compare with Previous | Blame | View Log
#!/usr/bin/env perl
# simple-math-parser.pl - Simple math parser written in Perl 5
# License: C.C. Attribution NonCommercial ShareAlike 3.0 Unported
# Revision: 120705

#---------------------------------------------------------------------
# important note
#---------------------------------------------------------------------

# This software is provided on an AS IS basis with ABSOLUTELY NO WAR-
# RANTY. The entire risk as to the quality and performance of the
# software is with you. Should the software prove defective, you as-
# sume the cost of all necessary servicing, repair or correction. In
# no event will any of the developers, or any other party, be liable
# to anyone for damages arising out of use of the software, or inabil-
# ity to use the software.

#---------------------------------------------------------------------
# overview
#---------------------------------------------------------------------

my $USAGE_TEXT = << 'END_OF_USAGE_TEXT';
Usage: simple-math-parser.pl "1.5+(2/3)*pi-sqrt(2)"

This is a CLI calculator program that takes a single arithmetic ex-
pression as an argument, evaluates it, and prints the result to stand-
ard output.

As a general rule, expressions should be quoted as shown here. Other-
wise, "shell meta-character" problems may occur.

The point of the program is to illustrate the structure of a simple
math parser. A recursive-descent approach is used; the core consists
of a single recursive routine (ParseMath).

Numbers may be integers, ordinary real numbers, or real numbers in
scientific notation. Examples of scientific notation: 1.3e+0 is equal
to 1.3, 12e-1 is equal to 1.2, and 5e+1 is equal to 50. The "+" sign
is optional in this context.

Supported operators include + (add), - (subtract or unary minus), *
(multiply), / (divide), and ** (exponentiate).

Six functions are supported: sqrt (square root), cbrt (cube root), log
(natural logarithm), sin, cos, and tan. The last three functions take
angles in radians.

Three standard constants may be used: e, phi (the Golden Ratio), and
pi.

This is the first public release of the program. Therefore, it should
be considered alpha and bugs may exist.
END_OF_USAGE_TEXT

#---------------------------------------------------------------------
# standard module setup
#---------------------------------------------------------------------

require 5.8.1;
use strict;
use Carp;
use warnings;
                                # Trap warnings
$SIG{__WARN__} = sub { die @_; };

#---------------------------------------------------------------------
# basic constants
#---------------------------------------------------------------------

use constant ZERO => 0; # Zero
use constant ONE => 1; # One
use constant TWO => 2; # Two

use constant FALSE => 0; # Boolean FALSE
use constant TRUE => 1; # Boolean TRUE

#---------------------------------------------------------------------
# common math constants
#---------------------------------------------------------------------

# This table maps one or more symbol names to associated numeric val-
# ues.

# Note: Symbol names should consist of a letter followed by zero or
# more alphanumeric characters. Letters should be specified in lower
# case.

my %MathConstants =
(
    'e' => '2.7182818284590452353603' ,
    'phi' => '1.6180339887498948482046' ,
    'pi' => '3.1415926535897932384626'
);

#---------------------------------------------------------------------
# program parameters
#---------------------------------------------------------------------

# $IE = Internal-error message prefix
# $MAXPRE = Maximum precedence level
# $PURPOSE = Short description of purpose
# $REVISION = Revision string
# $USE_LESS = Flag: Use "less" for usage text

my $IE = 'Internal error' ;
my $MAXPRE = 9999 ;
my $PURPOSE = 'Simple Perl math parser' ;
my $REVISION = '120705' ;
my $USE_LESS = TRUE ;

#---------------------------------------------------------------------
# token-related patterns
#---------------------------------------------------------------------

# $PatNumScience = Matches a non-negative number in scientific nota-
# tion
# $PatNumRegular = Matches an ordinary non-negative number
# $PatSymbol = Matches a symbol
# $PatOperator = Matches a parenthesis or an operator

# Note: In this context, exponentiation is represented by the single-
# character operator "~" as opposed to "**", which is used at a higher
# level. This simplifies the code.

my $PatNumScience = '\b\d+\.?\d*e[\+\-]?\d+' ;
my $PatNumRegular = '\b\d+\.?\d*' ;
my $PatSymbol = '\b[a-z]\w+\b' ;
my $PatOperator = '[\(\)\+\-\*/~]' ;

#---------------------------------------------------------------------

# @TokenPatterns is a list of all of the patterns that are used to
# match tokens.

my @TokenPatterns =
( # Note: Order is significant here
    $PatNumScience , $PatNumRegular , $PatSymbol , $PatOperator
);

#---------------------------------------------------------------------

# $TokenPatterns is a pattern that matches a token (of any supported
# type). The pattern omits enclosing parentheses.

my $TokenPatterns = join '|', @TokenPatterns;

#---------------------------------------------------------------------
# misc. global variables
#---------------------------------------------------------------------

my $PROGNAME; # Program name (without path)
   $PROGNAME = $0;
   $PROGNAME =~ s@.*/@@;

#---------------------------------------------------------------------
# support routines
#---------------------------------------------------------------------

# Routine: UsageError
# Purpose: Prints program usage text and exits
# Usage: &UsageError();

# If the global parameter $USE_LESS is TRUE, and if standard output is
# a terminal, usage text is piped through "less" (with some "less"-
# related instructions added). Otherwise, usage text is simply sent to
# standard output.

#---------------------------------------------------------------------

sub UsageError
{
    $USAGE_TEXT =~ s@^\s+@@s; # Remove leading white space

    $USAGE_TEXT = << "END"; # "END" must be double-quoted here
$PROGNAME $REVISION - $PURPOSE

$USAGE_TEXT
END
                                # Adjust trailing white space
    $USAGE_TEXT =~ s@\s*\z@\n@s;

    if ($USE_LESS && (-t STDOUT) && open (OFD, "|/usr/bin/less"))
    { # Handle output with "less"
                                # "END" must be double-quoted here
        $USAGE_TEXT = << "END";
To exit this "help" text, press "q" or "Q". To scroll up or down, use
PGUP, PGDN, or the arrow keys.

$USAGE_TEXT
END
        print OFD $USAGE_TEXT;
        close OFD;
    }
    else
    { # Handle output without "less"
        print "\n", $USAGE_TEXT, "\n";
    }

    exit ONE;
}

#---------------------------------------------------------------------
# parser routine
#---------------------------------------------------------------------

# Routine: ParseMath
# Purpose: Parses a list of math-related tokens

# Usage:
#
# my @tokens = ( '1', '+', '2', '/', '3' );
# my $result = &ParseMath (\@tokens, 0);

# Note: "ParseMath" is recursive.

# This routine takes two arguments: A reference (i.e., pointer) to a
# list of tokens and an integer, which should be zero unless the rou-
# tine happens to be calling itself (in which case it may use other
# values internally).

# Tokens may be non-negative integer or real numbers, plus or minus
# signs, a multiplication or division or exponentiation operator
# (*, /, or ~), parentheses, or the names of supported functions or
# constants.

# Note: "~" is used at this level instead of the more usual "**" as a
# matter of convenience. Higher-level code may map "**" or other char-
# acters or sequences to "~".

# Six functions are supported: sqrt (square root), cbrt (cube root),
# log (natural logarithm), sin, cos, and tan. The last three functions
# take angles in radians.

# Three standard constants may be used: e, phi (the Golden Ratio),
# and pi.

# Sub-expressions may be parenthesized. PEMDAS (i.e., standard prece-
# dence) rules are supported.

# For numbers, scientific notation is supported. Examples of scienti-
# fic notation: 1.3e+0 is equal to 1.3, 12e-1 is equal to 1.2, and
# 5e+1 is equal to 50. The "+" sign is optional in this context.

# Minus signs, as in the unary minus operator, must be specified as
# separate tokens.

#---------------------------------------------------------------------

sub ParseMath
{
                                # Argument list
    my ($p_tokens, $plevel) = @_;
    my $left; # Left operand (or token)
    my $right; # Right operand
    my $result; # Result
    my $str; # Scratch

#---------------------------------------------------------------------
# Initial setup.

    $left = shift (@$p_tokens); # Get first token
                                # Consistency check
    die "$IE #0001\n" unless defined $left;

#---------------------------------------------------------------------
# Various cases.

# This block handles symbolic constants (such as pi), parenthesized
# sub-expressions, the unary-minus operator, and functions such as
# "cos" or "sqrt".

    if (defined ($str = $MathConstants {$left}))
        { $left = $str; }
    elsif ($left eq '(')
        { $left = &ParseMath ($p_tokens, ZERO ); }
    elsif ($left eq '-')
        { $left = &ParseMath ($p_tokens, $MAXPRE); $left = (-$left); }
    elsif ($left =~ m@^(sqrt|cbrt|log|sin|cos|tan)\z@)
    { # Function
        $str = $left; # Name of function
                                # Function argument
        $left = &ParseMath ($p_tokens, TWO);

        eval
        { # "eval" traps most errors
            $left = sqrt ($left) if $str eq 'sqrt' ;
            $left = $left ** (1/3) if $str eq 'cbrt' ;
            $left = log ($left) if $str eq 'log' ;
            $left = sin ($left) if $str eq 'sin' ;
            $left = cos ($left) if $str eq 'cos' ;
            $left = sin ($left) / cos ($left) if $str eq 'tan' ;
        };

        die $@ if $@; # Handle trapped errors
    }

#---------------------------------------------------------------------
# Adjust and/or check intermediate result.

# This statement verifies that the current (left) operand has been re-
# duced to a number.

    die "Error: Invalid syntax\n"
        unless $left =~ m@^-?($PatNumScience|$PatNumRegular)\z@;

# This statement translates numbers that are still in scientific nota-
# tion to ordinary values (if possible).

    $left = $left + ZERO if $left =~ m@e@;

#---------------------------------------------------------------------
# Handle binary operators.

    while (TRUE)
    {
                                # Get operator token
        my $op = shift (@$p_tokens);
                                # Are we there yet?
        if (!defined ($op) || ($op eq ')')) { $result = $left; last; }
                                # Consistency check
        die "$IE #0002: $op\n" unless $op =~ m@[\+\-\*/~]\z@;

        my $nlevel = ZERO; # Precedence level
           $nlevel = ONE if ($op eq '*') || ($op eq '/');
           $nlevel = TWO if ($op eq '~');

                                # Stop here due to precedence?
        if ($plevel && ($plevel >= $nlevel))
        { # Yes
            unshift (@$p_tokens, $op);
            $result = $left;
            last;
        }

        $plevel = $nlevel; # Step to new precedence level
                                # Parse right side of sub-expression
        $right = &ParseMath ($p_tokens, $plevel);

        eval
        { # Note: "eval" traps most errors
            $result = $left + $right if $op eq '+';
            $result = $left - $right if $op eq '-';
            $result = $left * $right if $op eq '*';
            $result = $left / $right if $op eq '/';
$result = $left ** $right if $op eq '~';
};

die $@ if $@; # Handle trapped errors
# Consistency check
die "$IE #0003: $op\n" unless defined $result;

# Are we there yet?
last unless scalar @$p_tokens;

$plevel = ZERO; # We've resolved the left operand
        $left = $result; # Result is left side of another sub-
                                # expression
    }

#---------------------------------------------------------------------
# Wrap it up.

    $result; # Return the result
}

#---------------------------------------------------------------------
# main routine
#---------------------------------------------------------------------

sub Main
{
    my $data; # Input string
    my @tokens; # Input tokens

#---------------------------------------------------------------------
# Initial setup.

    select STDERR; $| = ONE; # Force STDERR flush on write
    select STDOUT; $| = ONE; # Force STDOUT flush on write

                                # Check the command line
    &UsageError() unless scalar (@ARGV) == ONE;

    $data = shift (@ARGV); # Input string
    $data =~ s@\s+@ @gs; # Adjust white space
                                # Check characters used
    die "Invalid character in expression: $1\n"
        if $data =~ m@([^a-z0-9\.\(\)\+\-\*/~ ])@i;

#---------------------------------------------------------------------
# Perform operations.

    $data = lc ($data); # Map input to lower case
    $data =~ s@\*\*@~@g; # Map "**" (exponentiation) to "~"
                                # (this simplifies the code)
                                # Put spaces around tokens
    $data =~ s@($TokenPatterns)@ $1 @gi;
                                # Split data into a tokens list
    @tokens = split m@\s+@, $data;
                                # Discard empty strings
    @tokens = grep { length; } @tokens;
                                # Parse tokens and print result
    print &ParseMath (\@tokens, ZERO), "\n";
    undef;
}

#---------------------------------------------------------------------
# main program
#---------------------------------------------------------------------

&Main(); # Call the main routine
exit ZERO; # Normal exit
Go to most recent revision | Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories socgen

[/] [socgen/] [trunk/] [tools/] [math/] [parser] - Rev 135