JSDoc: Source: parser.js

module.exports = parser

// we're going to use the FALSE node in various places,
// so I'm making it a global.
let FALSE = {
    type: "bool",
    value: false
}

let TRUE = {
    type: 'bool',
    value: true
}

/*
 * 
 * maybeBinary(left, my_prec) is used to compose 
 * binary expressions like 1 + 2 * 3. The trick to 
 * parse them properly is to correctly define 
 * the operator precedence, so we'll start with that:
 */
let PRECEDENCE = {
    "=": 1,
    "||": 2,
    "&&": 3,
    "<": 7,
    ">": 7,
    "<=": 7,
    ">=": 7,
    "==": 7,
    "!=": 7,
    "<<": 8,
    ">>": 8,
    "+": 10,
    "-": 10,
    "*": 20,
    "/": 20,
    "%": 20,
}

/**
 * 
 * @param {TokenStream} input operate tokens 
 * @returns {Object} ast
 */
function parser(input) {
    return parseToplevel()

    function isPunc(str) {
        let token = input.peek()
        return token && token.type === 'punc' && (!str || token.value === str) && token
    }

    function isKeyword(kw) {
        let token = input.peek()
        return token && token.type === 'kw' && (!kw || token.value === kw) && token
    }

    function isOp(op) {
        let token = input.peek()
        return token && token.type === 'op' && (!op || token.value === op) && token
    }

    function skipPunc(str) {
        // !bug: cannot Identity `;`
        if (isPunc(str)) {
            input.next()
        } else {
            input.croak(`Expecting punctuation: "${str}"`)
        }
    }

    function skipKeyward(kw) {
        if (isKeyword(kw)) {
            input.next()
        } else {
            input.croak(`Expecting keyword: "${kw}"`)
        }
    }

    function skipOp(op) {
        if (isOp(op)) {
            input.next()
        } else {
            input.croak(`Expecting operator: "${op}"`)
        }
    }

    function unexpected() {
        input.croak(`Unexpected token: ${JSON.stringify(input.peek())}`)
    }

    /**
     * 
     * delimited is a bit lower-level:
     * @param {String} start 
     * @param {String} stop 
     * @param {String} separator 
     * @param {function} parser 
     * @return {Array} arr, store a series of tokens which returns by parser
     */
    function delimited(start, stop, separator, parser) {
        let arr = []
        first = true
        // skip the punctuation
        skipPunc(start)
        while (!input.eof()) {
            if (isPunc(stop)) {
                break
            }
            if (first) {
                first = false
            } else {
                skipPunc(separator)
            }
            // the last separator can be missing
            if (isPunc(stop)) {
                break
            }
            arr.push(parser())
        }
        skipPunc(stop)
        return arr
    }

    /**
     * 
     * These `maybe*` functions check what follows after 
     * an expression in order to decide whether to wrap 
     * that expression in another node, or just return it as is.
     * @param {function} expression which will parse
     * @returns {Object} ast
     */
    function maybeCall(expression) {
        expr = expression()
        return isPunc('(') ? parseCall(expr) : expr
    }

    /**
     * 
     * If it's an operator that has a higher precedence than ours, 
     * then it wraps left in a new "binary" node, and for the right 
     * side it repeats the trick at the new precedence level (*):
     * 
     * since myPrecedence is initially zero, any operator will trigger
     * the building of a "binary" node (or "assign" when the operator is =)
     * @param {Object} left 
     * @param {Number} myPrecedence
     * @return {Object} op ast
     */
    function maybeBinary(left, myPrecedence) {
        let token = isOp()
        if (token) {
            let hisPrecedence = PRECEDENCE[token.value]
            // need to read more token
            if (hisPrecedence > myPrecedence) {
                input.next()
                // check whether the right operand is a binary operation
                // firstly do the operation which is higher precedence
                // after finishing recursive, it returns the right operand
                let right = maybeBinary(parseAtom(), hisPrecedence) // (*)
                let binary = {
                    type: token.value === '=' ? 'assign' : 'binary',
                    operator: token.value,
                    left: left,
                    right: right
                }
                return maybeBinary(binary, myPrecedence)
            }
        }
        return left
    }

    /**
     * 
     * parseAtom() does the main dispatching job, 
     * depending on the current token:
     * @return {Object} ast
     */
    function parseAtom() {

        return maybeCall(function () {
            if (isPunc('(')) {
                input.next()
                let expression = parseExpression()
                skipPunc(')')
                return expression
            }
            if (isPunc('{')) {
                return parseProg()
            }
            if (isKeyword('if')) {
                return parseIf()
            }
            if (isKeyword('true') || isKeyword('false')) {
                return parseBool()
            }
            if (isKeyword('let')) {
                return parseLet()
            }

            if (isKeyword('lambda') || isKeyword('λ')) {
                return parseLambda()
            }

            let token = input.next()
            if (token.type === 'var' || token.type === 'num' || token.type === 'str') {
                return token
            }
            unexpected()
        })
    }

    /**
     * 
     * @returns {Object} bool ast
     */
    function parseBool() {
        return {
            type: 'bool',
            value: input.next().value === 'true'
        }
    }

    /**
     * 
     * Contrary to parseAtom(), this one will extend
     *  an expression as much as possible to the right
     *  using maybeBinary(), which is explained below.
     * @return {Object} ast
     */
    function parseExpression() {
        return maybeCall(function () {
            return maybeBinary(parseAtom(), 0)
        })
    }

    /**
     * 
     * @returns {any} basic type's value
     */
    function parseVarname() {
        let name = input.next()
        if (name.type !== 'var') {
            input.croak('Expecting variable name')
        }
        return name.value
    }

    /**
     * 
     * This function will be invoked when the lambda keyword 
     * has already been seen and “eaten” from the input, 
     * so all it cares for is to parse the argument names;
     * but they're in parentheses and delimited by commas.
     * @returns {Object} lambda ast 
     */
    function parseLambda() {
        input.next()
        return {
            type: 'lambda',
            // optional lambda name
            name: input.peek().type === 'var' ? input.next() : null,
            vars: delimited('(', ')', ',', parseVarname),
            body: parseExpression()
        }
    }

    /**
     * 
     * IIFE, optional name
     *  The function's argument names are the variables 
     * defined in let, and the "call" will take care to 
     * send the values in args. And the body of the function is, 
     * of course, fetched with parse_expression()
     * @returns {Object} let ast
     */
    function parseLet() {
        skipKeyward('let')
        if (input.peek().type === 'var') {
            let name = input.next().value
            let defs = delimited('(', ')', ',', parseVardef)
            return {
                type: 'call',
                func: {
                    type: 'lambda',
                    name: name,
                    vars: defs.map(function (def) {
                        return def.name
                    }),
                    body: parseExpression()
                },
                args: defs.map(function (def) {
                    return def.def || FALSE
                })
            }
        }
        return {
            type: 'let',
            vars: delimited('(', ')', ',', parseVardef),
            body: parseExpression()
        }
    }

    function parseVardef() {
        let name = parseVarname()
        let def = null
        if (isOp('=')) {
            // skip the '='
            input.next()
            def = parseExpression()
        }
        return {
            name,
            def
        }
    }

    /**
     * 
     * it will be called firstly
     * @returns {Object} prog ast
     */
    function parseToplevel() {
        let prog = []
        while (!input.eof()) {
            prog.push(parseExpression())
            if (!input.eof()) {
                skipPunc(';')
            }
        }
        return {
            type: 'prog',
            prog: prog
        }
    }

    /**
     * 
     * @returns {Object} if epxression ast
     */
    function parseIf() {
        // this throws an error if the current token is not the given keyword
        skipKeyward('if')
        // parse the condition
        let cond = parseExpression()
        // if the consequent branch doesn't 
        // start with a { we require the keyword then to be present
        if (!isPunc('{')) {
            skipKeyward('then')
        }
        let then = parseExpression()
        let ret = {
            type: 'if',
            cond: cond,
            then: then
        }
        // when there is a 'else'
        if (isKeyword('else')) {
            input.next()
            ret.else = parseExpression()
        }
        return ret
    }

    /**
     * 
     * It will do some minor optimization at this point — 
     * if the prog is empty, then it just returns FALSE. 
     * If it has a single expression, it is returned 
     * instead of a "prog" node. Otherwise it returns
     * a "prog" node containing the expressions.
     * @returns {Object} prog ast
     */
    function parseProg() {
        let prog = delimited('{', '}', ';', parseExpression)
        if (prog.length === 0) {
            return FALSE
        }
        if (prog.length === 1) {
            return prog[0]
        }
        return {
            type: 'prog',
            prog: prog
        }
    }

    /**
     * 
     * @param {any} func which is called
     * @returns {Object} call ast 
     */
    function parseCall(func) {
        return {
            type: 'call',
            func: func,
            args: delimited('(', ')', ',', parseExpression)
        }
    }
}