CategoryJavaScript

How to customize MorganJS

MorganJS is easy to install and works nicely out of the box.

var Express = require('express');
var app = Express();
// ...
var Morgan = require('morgan');
app.use(Morgan('dev'));
// ...
app.listen(port);
console.log('Magic happens on port ' + port + ' since ' + (new Date));

Here is what it looks like. Highlighted HTTP status codes are quite useful.

Screen Shot 2015-11-08 at 14.44.07

Thankfully, it’s possible to customize MorganJS by adding tokens, which are template symbols, like this:

Morgan.token('current-user', function(req, res) {
    var result = req.currentUser ? req.currentUser.name : 'Anonymous';
    return result;
});

which can be later used like this:

// ...
var Morgan = require('morgan');
app.use(Morgan(':date[iso] :current-user :method :url :status :response-time ms - :res[content-length] bytes'));
// ...

to produce something like this:

Screen Shot 2015-11-08 at 14.52.16

Uh-oh!! Where are my colors?

I delve into MorganJS code…

/**
 * dev (colored)
 */

morgan.format('dev', function developmentFormatLine(tokens, req, res) {
  // get the status code if response written
  var status = res._header
    ? res.statusCode
    : undefined;

  // get status color
  var color = status >= 500 ? 31 // red
    : status >= 400 ? 33 // yellow
    : status >= 300 ? 36 // cyan
    : status >= 200 ? 32 // green
    : 0; // no color

  // get colored function
  var fn = developmentFormatLine[color];

  if (!fn) {
    // compile
    fn = developmentFormatLine[color] = compile('\x1b[0m:method :url \x1b['
      + color + 'm:status \x1b[0m:response-time ms - :res[content-length]\x1b[0m');
  }

  return fn(tokens, req, res);
})

As you may have noticed, the above code is hard to understand and quite hard-coded too.

  • Hard-coded because, even if the ´dev´ template is documented as ´:method :url :status :response-time ms – :res[content-length]´, it’s really embedded into the code and mixed up with extraneous bits rather than being declared into some option and used like any other MorganJS template is.
  • Hard to understand because the function object is being used as a cache for its own executions which entail a compilation step whose raison d’être I still have to grasp. I could be wrong, but this one could be a clear example of over-engineering.

However my biggest disappointment was that there is no way of reusing the colored ´:status´ token nor the coloring functionality, neither directly, by calling a method, nor indirectly, by copy-pasting some code. A total fail. :-(

Googling “terminal colors” I eventually got to this Unix StackExchange answer, which I used to write this:

function ColorFactory() {
    var result = {};

    /* beautify preserve:start */
    result.DEFAULT      = '\x1b[0m';
    result.WHITE        = '\x1b[1;37m';
    result.BLACK        = '\x1b[0;30m';
    result.BLUE         = '\x1b[0;34m';
    result.LIGHT_BLUE   = '\x1b[1;34m';
    result.GREEN        = '\x1b[0;32m';
    result.LIGHT_GREEN  = '\x1b[1;32m';
    result.CYAN         = '\x1b[0;36m';
    result.LIGHT_CYAN   = '\x1b[1;36m';
    result.RED          = '\x1b[0;31m';
    result.LIGHT_RED    = '\x1b[1;31m';
    result.PURPLE       = '\x1b[0;35m';
    result.LIGHT_PURPLE = '\x1b[1;35m';
    result.BROWN        = '\x1b[0;33m';
    result.YELLOW       = '\x1b[1;33m';
    result.GRAY         = '\x1b[0;30m';
    result.LIGHT_GRAY   = '\x1b[0;37m';
    /* beautify preserve:end */

    // ['DEFAULT', 'WHITE', 'BLACK', ...]
    result.allNames = Object.keys(result);

    // ['\x1b[0m', '\x1b[1;37m', '\x1b[0;30m', ...]
    result.allValues = result.allNames.map(function(name) {
        return result[name];
    });

    var anyValue = result.allValues.map(function(value) {
        return value.replace(/\W/g, '\\$&');
    }).join('|');
    var anySequenceOfValues = new RegExp('(?:' + result.anyValue + ')+(' + result.anyValue + ')', 'g');
    var lastValueOfSequence = '$1';

    function paint(COLOR) {
        return function(string) {
            var colored = COLOR + string + result.DEFAULT;
            var simplified = colored.replace(anySequenceOfValues, lastValueOfSequence);
            return simplified;
        }
    }

    /* beautify preserve:start */
    result.Default     = paint(result.DEFAULT);
    result.White       = paint(result.WHITE);
    result.Black       = paint(result.BLACK);
    result.Blue        = paint(result.BLUE);
    result.LightBlue   = paint(result.LIGHT_BLUE);
    result.Green       = paint(result.GREEN);
    result.LightGreen  = paint(result.LIGHT_GREEN);
    result.Cyan        = paint(result.CYAN);
    result.LightCyan   = paint(result.LIGHT_CYAN);
    result.Red         = paint(result.RED);
    result.LightRed    = paint(result.LIGHT_RED);
    result.Purple      = paint(result.PURPLE);
    result.LightPurple = paint(result.LIGHT_PURPLE);
    result.Brown       = paint(result.BROWN);
    result.Yellow      = paint(result.YELLOW);
    result.Gray        = paint(result.GRAY);
    result.LightGray   = paint(result.LIGHT_GRAY);
    /* beautify preserve:end */

    return result;
}

A nice collateral about my ´ColorFactory´ function is that I can use it also in the console like this:

var color = ColorFactory();
console.log('Magic happens on port ' + color.Purple(port) + ' since ' + (new Date));

to get something like this:

Screen Shot 2015-11-08 at 16.12.17

Finally, I was able to customize Morgan with this:

function MorganFactory() {
    var color = ColorFactory();

    var Morgan = require('morgan');

    Morgan.token('current-user', function(req, res, type) {
        var result = req.currentUser ? req.currentUser.name : 'Anonymous';
        if ('colored' == type) {
            result = req.currentUser ? color.LightBlue(req.currentUser.name) : color.LightRed('Anonymous');
        }
        return result;
    });

    var defaultStatusToken = Morgan['status'];
    Morgan.token('status', function(req, res, type) {
        var status = defaultStatusToken(req, res);
        if ('colored' == type) {
            /* beautify preserve:start */
            var result =
                  status >= 500 ? color.Red(status)
                : status >= 400 ? color.Yellow(status)
                : status >= 300 ? color.Cyan(status)
                : status >= 200 ? color.Green(status)
                : status;
            /* beautify preserve:end */            
        } else {
            result = status;
        }
        return result;
    });

    return Morgan.apply(null, arguments);
}

and use it like this:

// ...
var Morgan = MorganFactory;
app.use(Morgan(':date[iso] :current-user[colored] :method :url :status[colored] :response-time ms - :res[content-length] bytes'));
// ...

to get something like this:

Screen Shot 2015-11-08 at 16.51.54

 

How to improve filters with promises

I had been programming a filters setup for the node API of a MEAN stack app.

Having this ´User´ model:

// user.model.js (complete)

var mongoose = require('mongoose');

var schema   = new mongoose.Schema({
    name: String,
    admin: Boolean
});

module.exports = mongoose.model('User', schema);

It allowed a ´User´ controller like this:

// user.controller.js (complete)

var fields = [
    'name', 
    function (admin) { 
        return !!admin.length; 
    }
];

var Item = require('./user.model');
var Controller = require(global.absPath + '/app/shared/CRUD.controller');
module.exports = Controller(Item, fields);

The meaning should be straightforward: copy the ´name´ field as is and make the ´admin´ field a proper boolean. That was made possible by this:

// CRUD.controller.js (excerpt)

module.exports = CRUD_Controller;

function CRUD_Controller(Item, fields) {
    //...
    function Create(req, res) {

        var item = new Item();

        CopyFields(fields, req.body, item);

        item.save(function(err) {

            if (err) {
                return res.send(err);
            }

            res.json({
                message: 'Item created!'
            });

        });

    }


    function CopyFields(fields, data, item) {

        (fields || []).forEach(function(field) {

            switch (typeof field) {

                case 'string':
                    item[field] = data[field];
                    break;

                case 'function':
                    var matches = String(field).match(/^function\s*\(\s*(\w+)\s*\)/);
                    if (!(matches && matches[1])) {
                        console.log('Expected a function with only one argument.');
                        return;
                    }
                    var name = matches[1];
                    item[name] = field(data[name]);
                    break;

            }

        });

    }
    //...
}

Then I wanted to add a ´password´ field to the ´User´ model. For storing it I decided to go with Strong Password Hashing with Node.js Standard Library. Properly translated to JavaScript and slightly tweaked I got this:

// hash.js (complete)

var crypto = require('crypto');

module.exports = Hash;

return;

function Hash(options, callback) {

    // Default options.plaintext to a random 8-character string
    if (!options.plaintext) {
        return crypto.randomBytes(8, function(err, buf) {
            if (err) {
                return callback(err);
            }
            options.plaintext = buf.toString('base64');
            Hash(options, callback);
        });
    }

    // Default options.salt to a random 64-character string (512 bits)
    if (!options.salt) {
        return crypto.randomBytes(64, function(err, buf) {
            if (err) {
                return callback(err);
            }
            options.salt = buf.toString('base64');
            Hash(options, callback);
        });
    } 

    // Default options.iterations to 10k
    if (!options.iterations) {
        options.iterations = 10000;
    }

    // Default options.digest to sha1
    if (!options.digest) {
        options.digest = 'sha1';
    }

    crypto.pbkdf2(options.plaintext, options.salt, options.iterations, 64, options.digest, function(err, key) {
        if (err) {
            return callback(err);
        }
        options.algorithm = 'PBDFK2';
        options.key = key.toString('base64');
        callback(null, options);
    });

}

So my ´User´ model became this:

// user.model.js (complete)

var mongoose = require('mongoose');

var schema   = new mongoose.Schema({
    name: String,
    password: {
        algorithm:  String,
        digest:     String,
        iterations: Number,
        salt:       String,
        key:        String
    },
    admin: Boolean
});

module.exports = mongoose.model('User', schema);

Have you noticed that the ´Hash´ function relies on the asynchronous´crypto.pbkdf2´ function? That’s just standard, so I wasn’t going to use the synchronous version on a second thought.

Then my problem was:

How do I make these filters work with deferred values?

Ta-da! Promises:

// user.controller.js (complete)

var Promise = require('es6-promise').Promise;
var fields = [
    'name', 
    function (password) { 
        return new Promise(function (resolve, reject) {
            var Hash = require(global.absPath + '/app/components/auth/hash');
            Hash({plaintext: password}, function (error, result) {
                if (error) {
                    reject(Error(error));
                } else {
                    delete result.plaintext;
                    resolve(result);
                }
            });
        });
    }, 
    function (admin) { 
        return !!admin.length; 
    }
];

var Item = require('./user.model');
var Controller = require(global.absPath + '/app/shared/CRUD.controller');
module.exports = Controller(Item, fields);

To make that work I had to change a bit the ´CRUD´ controller.

The first change was to separate the filtering from the assignment, so that I could later use the ´Promise.all´ method which allows to synchronize promises and values as well. That implied to pass from a ´CopyFields´ function which filters and assigns each value in turn to a ´FilterFields´ function which filters all values at once, thus making the assignments directly in the ´Create´ function.

// CRUD.controller.js (broken excerpt) 
 
module.exports = CRUD_Controller; 
 
function CRUD_Controller(Item, fields) { 
    //... 
    function Create(req, res) {

        FilterFields(fields, req.body, function (fFields) {
            var item = new Item();

            fFields.forEach(function (fField) {
                item[fField.name] = fField.value;
            });

            item.save(function(err) {

                if (err) {
                    return res.send(err);
                }

                res.json({
                    message: 'Item created!'
                });

            });
        });

    }


    function FilterFields(fields, data, callback) {

        Promise
            .all((fields || []).map(Filter))
            .then(callback)
            .catch(function (error) {
                console.log(error);
            });


        function Filter(field) {
            var result;

            switch (typeof field) {

                case 'string':
                    result = {
                        name: field,
                        value: data[field]
                    };
                    break;

                case 'function':
                    var matches = String(field).match(/^function\s*\(\s*(\w+)\s*\)/);
                    if (!(matches && matches[1])) {
                        console.log('Expected a function with only one argument.');
                        return;
                    }
                    result = {
                        name: matches[1],
                        value: field(data[matches[1]])
                    };
                    break;

            }

            return result;
        }

    }
    //... 
}

The second change was to add a needed special treatment for my promises. You may have noticed that, in the ´case ‘function’:´ above, ´result.value´ can be a promise BUT that won’t make ´result´ a promise itself!! So the code above wouldn’t work yet, because it would complete ´Promise.all´ before getting the hashed password. Finally, I got this:

// CRUD.controller.js (working excerpt)

module.exports = CRUD_Controller; 
 
function CRUD_Controller(Item, fields) { 
    //... 
    function Create(req, res) {

        FilterFields(fields, req.body, function (fFields) {
            var item = new Item();

            fFields.forEach(function (fField) {
                item[fField.name] = fField.value;
            });

            item.save(function(err) {

                if (err) {
                    return res.send(err);
                }

                res.json({
                    message: 'Item created!'
                });

            });
        });

    }


    function FilterFields(fields, data, callback) {

        Promise
            .all((fields || []).map(Filter))
            .then(callback)
            .catch(function (error) {
                console.log(error);
            });


        function Filter(field) {
            var result;

            switch (typeof field) {

                case 'string':
                    result = {
                        name: field,
                        value: data[field]
                    };
                    break;

                case 'function':
                    var matches = String(field).match(/^function\s*\(\s*(\w+)\s*\)/);
                    if (!(matches && matches[1])) {
                        console.log('Expected a function with only one argument.');
                        return;
                    }
                    result = {
                        name: matches[1],
                        value: field(data[matches[1]])
                    };
                    if (stuff.isPromise(result.value)) {
                        var promise = new Promise(function (resolve, reject) {
                            var name = result.name;
                            result.value.then(function (value) {
                                resolve({
                                    name: name,
                                    value: value
                                });
                            }).catch(function (error) {
                                reject(Error(error));
                            });
                        });
                        result = promise;
                    }
                    break;

            }

            return result;
        }

    }
    //...
}

The added lines make ´result´ a promise if ´result.value´ is one: ´result´ will eventually resolve to the expected result. BTW, the ´stuff.isPromise´ method is the classical ´object.then && typeof object.then == ‘function’´.

How to scrape AJAX trees

DISCO, European Dictionary of Skills and Competences, offers the user a tree to be searched or browsed. Inspecting the tree nodes, we see that concepts are contained in LI elements with an liItem class. Executing $(‘.liItem’).length in the console we get 676. They claim instead to collect more than 104000 concepts. A bold claim?

A better look at the tree structure reveals that some concepts have a data-loaded attribute set to true and some set to false. In particular, true denotes readily available nodes (downloaded with the initial page load) and false denotes nodes that require an AJAX call before being displayed. Leaf nodes are always of the former kind, but internal nodes can be of both kinds. Would we get those 104000 concepts if we unfolded all the false nodes?

We’ll try. Along the way we’ll also store all nodes into a different structure, something more portable than bare HTML. JSON seems a good option. Ironically, DISCO uses getJSON to download HTML snippets. To summarize, we are now going to store all the HTML tree of DISCO into a JSON structure.

As you probably understood by reading some of my last articles, I had decided to scrape the DISCO tree by means of the support provided by jsFiddle. That was before I discovered the existence of Custom JavaScript Snippets in Google Chrome Developer Tools. Apparently they’ve been there for quite some time, now almost two years !

Screen Shot 2014-06-21 at 14.44.43

Too bad for me. At least it was fun to make DISCO’s page behave outside of DISCO’s server.

Here is the snippet I came up with:

(function() {
    
    no_frills();
        
    var limit = 0;              // a guard to safely try things out
    var node_count = 0;         // a counter of visited nodes
    var pending_responses = 0;  // a counter for tracing ajax calls

    $('body')
        .prepend('<div>Limit: <input id="limit" type="text" size="5" /> <a id="run" href="#">Run</a></div>');
    
    $('#run')
        .click(function (e) {
            e.preventDefault();

            // when the execution ends, window.disco contains all nodes
            window.disco = {nodes: {}};

            limit = $('#limit').val() * 1 || 1;
            node_count = 0;

            var horizontal_skills = $('.rootList > li').get(0);
            console.log(clean_text($(horizontal_skills).text()));
            download(horizontal_skills);

            var vertical_skills = $('.rootList > li').get(2);
            console.log(clean_text($(vertical_skills).text()));
            download(vertical_skills);
        });

    //---

    // outputs window.disco as soon as there are no pending responses
    function output_result_if_done() {
        if (0 < pending_responses) return;
        console.log('(result)', window.disco);
    }

    // downloads a node
    // node is one li.liItem DOM element
    function download(node) {
        if (limit-- <= 0) return;
        grab_contents(node);
        if (has_children(node)) {
            if (children_are_ready(node)) {
                var children = $(node).next().find('> ul > .liItem').get();
                download_children(node, children);
            }
            else {
                var url = get_children_url(node);
                ++pending_responses;
                $.getJSON(url, function(data) {
                    var children = $(data.html).find('.liItem').get();
                    download_children(node, children);
                    --pending_responses;
                    output_result_if_done();
                });
            }
        }
    }

    // downloads a node's children
    // children is an array of li.liItem DOM elements
    function download_children(node, children) {
        grab_children(node, children);
        for (var i = 0, iTop = children.length; i < iTop; i++) {
            download(children[i]);
        }
    }

    // returns the URL to download a node's contents
    function get_contents_url(node) {
        var template = 'ajax/ajaxCalls.php?ajaxFunction=loadTermData&term_id=--ID--&lang_id=0';
        var id = get_id(node);
        var result = template.replace('--ID--', id);
        return result;
    }

    // returns the URL to download a node's children
    function get_children_url(node) {
        var template = 'ajax/ajaxCalls.php?ajaxFunction=loadNode&prefix=node_&node=--ID--&lang_id=0&documents=false';
        var id = get_id(node);
        var result = template.replace('--ID--', id);
        return result;
    }

    // returns the id of a node
    function get_id(node) {
        var result = $(node).data('termid').match(/node_(\d+)_0/)[1] * 1;
        return result;
    }

    // adds a key / value pair to an id position into window.disco
    function store(id, key, value) {
        if ('label' == key) {
            console.log('new node ' + (++node_count));
        }
        console.log('storing ' + key + ' for ' + id);
        window.disco.nodes[id] = window.disco.nodes[id] || {};
        window.disco.nodes[id][key] = value;
    }

    // true if node has children, indipendently from the fact that they are ready or not
    function has_children(node) {
        var result = $(node).children(':first').is('a.dummyLink');
        return result;
    }

    // true if node's children are ready to be visited
    function children_are_ready(node) {
        var next$ = $(node).next();
        if (! (next$.is('li.noBulletsLi'))) return false;

        var child$ = next$.children(':first');
        if (! (child$.is('ul.innerUl'))) return false;

        var siblings = child$.siblings();
        if (! (siblings.length == 0)) return false;

        return true;
    }

    // grabs what we want to store about a node's contents elements
    function grab_contents(node) {

        var id = get_id(node);
        var label = clean_text($(node).find('.itemToBeAdded').text());
        store(id, 'label', label);

        var url = get_contents_url(node);
        ++pending_responses;
        $.getJSON(url, function(data) {

            var html$ = $('<div>' + data.html + '</div>');  // why .wrapAll doesn't work here?

            var stuff = {};
            stuff['term']         = {
                items$: html$.find('#infoTerm'),
                key:    null,
                value:  function(li$) { return clean_text(li$.find('.infoItemTranslation').text()); }
            };
            stuff['synonyms']     = {
                items$: html$.find('h1:contains("synonyms:")').next().find('li'),
                key:    null,
                value:  function(li$) { return clean_text(li$.text()); }
            };
            stuff['translations'] = {
                items$: html$.find('#termTranslations li'),
                key:    function(li$) { return clean_text(li$.find('.langIdentifier').text()); },
                value:  function(li$) { return clean_text(li$.find('.infoItemTranslation').text()); }
            };
            stuff['phrases']      = {
                items$: html$.find('h1:contains("attached phrases:")').next().find('li'),
                key:    null,
                value:  function(li$) { return clean_text(li$.text()); }
            };
            stuff['related']      = {
                items$: html$.find('h1:contains("related terms:")').next().find('li'),
                key:    null,
                value:  function(li$) { return clean_text(li$.text()); }
            };

            var contents = {id: id};
            $.each(stuff, function(stuff_key, stuff_value) {
                if (stuff_value.key) {
                    contents[stuff_key] = {};
                    stuff_value.items$.each(function() {
                        var this$ = $(this);
                        contents[stuff_key][stuff_value.key(this$)] = stuff_value.value(this$);
                    });
                }
                else {
                    contents[stuff_key] = [];
                    stuff_value.items$.each(function() {
                        var this$ = $(this);
                        contents[stuff_key].push(stuff_value.value(this$));
                    });
                }
            });

            store(id, 'contents', contents);

            --pending_responses;
            output_result_if_done();

        });
    }

    // grabs what we want to store about a node's children elements
    function grab_children(node, data) {
        var children = $.map(data, get_id);
        var id = get_id(node);
        store(id, 'children', children);
    }

    // cleans up a text (no leading nor trailing spaces, no trailing colon)
    function clean_text(text) {
        return text.replace(/^\s+|\s+$/, '').replace(/:$/, '');
    }

    // removes decoration elements
    function no_frills() {
        $('.mainNavi, .subNavi, #SpalteMitte, #SpalteRechts, .searchWrapper, .footer, .bannerWrapper, .contentHeader')
            .remove();
    }

})();

There are only a few things to note:

  • I’ve put a limit as a guard to safely try things out
    • with recursive structures –like trees– it’s very useful to limit actions to a small amount of nodes before going full monty
    • this limit is just how many nodes to visit, you can start with a low number like 20 or 50 and see how it works
    • you should get quite a long list of messages output to the console, and if all was fine, the last message will be the result
  • The result is a hash of node ids as keys and node objects as values
    • for example, window.disco.nodes[16901] is
      {
         "label":"aesthetic sensitivity",
         "contents":{
            "id":16091,
            "term":[
               "aesthetic sensitivity"
            ],
            "synonyms":[
               "aesthetic sense",
               "sense of aesthetics"
            ],
            "translations":{
               "CZ":"estetické cítění",
               "DE":"ästhetisches Empfinden",
               "ES":"sensibilidad estética",
               "FR":"sensibilité esthétique",
               "HU":"esztétikai érzék",
               "IT":"sensibilità estetica",
               "LT":"estetinis jautrumas",
               "PL":"wrażliwość estetyczna",
               "SK":"estetické cítenie",
               "SE":"estetisk känsla"
            },
            "phrases":[
      
            ],
            "related":[
               "tolerance of change and uncertainty"
            ]
         },
         "children":[
            15215,
            15213
         ]
      }

      which corresponds to this node in DISCO’s page
      Screen Shot 2014-06-21 at 19.09.54
  • The functions download(node) and download_children(node, children) are mutually recursive
    • their arguments are coherent, i.e. node is an LI element and children is an array of LI elements
    • the latter is not integrated into the former because we need to provide the same treatment to both children readily available and those that will be in the future
    • they start visiting from the two roots –horizontal_skills and vertical_skills– and drill down into the tree structure
  • The UI is never updated by the snippet, instead all the state is automatically kept in memory by the recursive descent
    • if you unfolded aesthetic sensitivity (node 16091) in the tree yourself between two executions with a small number of nodes (say 20), you’d get two different results
    • the first result would (probably) not show aesthetic sensitivity children while the second result would (probably) not show the last two nodes of the first result, thus keeping the number of nodes stuck to the given limit
    • if you want to go back to the initial mint state, a simple reloading won’t be enough without deleting first the session cookie
  • Finally, you can run JSON.stringify(window.disco) and get a nice JSON string which you can copy and paste somewhere and save to a file
    • the hash to string conversion is gonna need some minutes… so many in fact that I left the browser working “indefinitely” (half an hour?)
    • the resulting string is humungous too: 3.785.133 bytes (3,8 MB on disk).

Conclusions

The execution of the above snippet with a limit of 105000 nodes takes around 3 minutes on my MacBook Air with 4GB RAM. At the end, you’ll discover that the last node was number 7380 !!

Wow, that’s a huge difference from the claimed “more than 104000 concepts”. How can it be?

Even considering that they provide a multilingual thesauri with 11 languages and they could have inflated 7380 * 11 times = 81180, there is still around 28% of missing concepts. Could they have added the number of phrases? No, because they separately claim “approximately 36000 example phrases”. They could have instead added the number of synonyms.

$.map(window.disco.nodes, function(v) { 
    return v.contents.synonyms.length; 
}).reduce(function(a, b) {
    return a + b;
});

Running the above code we get 3443 synonyms, which added to 7380 concepts make for 10823 terms, which inflated 11 times make for 119053 terms in all languages.

  • 7380 * 11 + 3443 + X * 10 = 104000
  • X = (104000 – 84623) / 10
  • X = 1937.7 = 56% * 3443 = 26% * 7380

Hm, I don’t know. It seems to me that they “confused” concepts with terms and at the same time, while English synonyms count is around 47% of concepts, in all other languages synonyms count is around 26% of concepts, which is ostensibly much less.

All in all, 7380 concepts is a good number but it’s only the 7% of what they claim.

How to build, traverse and map simple trees

Trees have funny definitions like this:

A tree is either nothing or something creating more trees.

Here is some terminology.

Node.A structure holding some data.
Edge.A connection from a parent node to a child node.
Parent of a node X.Any node that creates X.
Child of a node X.Any node that is created by X.
Degree of a node X.Number of children of X.
Sibling of a node X.Any node which is another child of a parent of X.
Ancestor of a node X.Any node that is a parent of X or an ancestor of a parent of X.
Descendant of a node X.Any node that is a child of X or a descendant of a child of X.
Root node.Any node without parents.
Leaf node.Any node without children.
Internal node.Any node which is not a root nor a leaf.
Level of a node X.1 + Number of edges from a root to X.
Height of a node X.Number of edges from X to a leaf.

We call simple trees those trees where there is only one root and all other nodes have only one parent. There are many things that would be better represented by trees with multiple roots and multiple parents. Unfortunately their complexity is not really different from directed acyclic graphs, so the former are seen as a special case of the latter and treated accordingly.

Building

In JavaScript, we can build simple trees like this:

function Node(contents, children) {
    return {
        contents: contents || {},
        children: children || []
    };
}

var tree =
    Node(1, [
        Node(2, [
            Node(5),
            Node(6)
        ]),
        Node(3),
        Node(4, [
            Node(7)
        ])
    ]);

console.log(tree);

whose console output is:

simple tree

Usually we create trees for storing data together with their hierarchical relationships to other data. For example, a thesaurus easily fits into a tree. In fact, we can even define a thesaurus as a tree-like structure:

A thesaurus is either nothing or something creating a narrower thesaurus, and relating to some other thesaurus.

Thus a thesaurus is just a tree together with an additional generic relationship between its nodes.

Traversing

A tree traversal is maybe the most common operation for a tree, regarded as a collection of nodes. Each node is visited once, and there are many possible ways to do it orderly. One of the most common ways is to traverse in depth first order, i.e. starting from the root, we visit a node and each of its children. For example, in our tree (1(2(5,6),3,4(7))), a depth first traversal would visit in order: 1,2,5,6,3,4,7. It’s easy to understand why it’s a very common traversal. It’s not a coincidence that the order is the same we would get by erasing the structure.

Here is a bit of JavaScript code for traversing a tree in depth first order.

function TreeTraversal(tree) {

    var _limit = 0;

    var _index = 0;

    var _self = {
        limit: limit,
        each:  each,
        map:   map,
        tree:  tree
    };

    return _self;

    //---

    // reads / writes the maximum number of nodes to visit
    // 0 means unlimited, i.e. visit all nodes
    function limit(at) {
        if (typeof at == 'undefined') {
            return _limit;
        }
        _limit = Math.abs(at) || 0;
        return _self;
    }

    // true if the limit is still to be reached
    function before_limit() {
        return _limit == 0 || _index < _limit;
    }

    // visits each node in a pre-order depth-first traversal
    // the callback gets 'this' set to the current node 
    // the callback gets an argument set to the zero-based index of the node during the traversal
    // the callback can return false to immediately stop the traversal
    function each(callback) {
        var once_more = true;
        _index = 0;
        var remaining_nodes = [tree];

        while( once_more && before_limit() && remaining_nodes.length ) {
            var current_node = remaining_nodes.shift();
            remaining_nodes = current_node.children.concat(remaining_nodes);

            once_more = callback.call(current_node, _index++) !== false;
        }

        return _self;
    }

}

var logit = function (index) {
    console.log(index + ': ' + this.contents + ', rest ' + this.children.length + '.');
};

var myTreeNodes = TreeTraversal(tree);

console.log('--');
myTreeNodes.each(logit);

console.log('--');
myTreeNodes.limit(5).each(logit);

console.log('--');
myTreeNodes.limit(0).each(logit);

The console output is:

traversal-each

Mapping

There are many possible things we can do while traversing a tree. One of the most common is to map nodes’ contents to some transformation so that we get another tree with same structure but different data.

Here is an updated version of TreeTraversal with a new map method.

function TreeTraversal(tree) {

    var _limit = 0;

    var _index = 0;

    var _self = {
        limit: limit,
        each:  each,
        map:   map,
        tree:  tree
    };

    return _self;

    //---

    // reads / writes the maximum number of nodes to visit
    // 0 means unlimited, i.e. visit all nodes
    function limit(at) {
        if (typeof at == 'undefined') {
            return _limit;
        }
        _limit = Math.abs(at) || 0;
        return _self;
    }

    // true if the limit is still to be reached
    function before_limit() {
        return _limit == 0 || _index < _limit;
    }

    // visits each node in a pre-order depth-first traversal
    // the callback gets 'this' set to the current node 
    // the callback gets an argument set to the zero-based index of the node during the traversal
    // the callback can return false to immediately stop the traversal
    function each(callback) {
        var once_more = true;
        _index = 0;
        var remaining_nodes = [tree];

        while( once_more && before_limit() && remaining_nodes.length ) {
            var current_node = remaining_nodes.shift();
            remaining_nodes = current_node.children.concat(remaining_nodes);

            once_more = callback.call(current_node, _index++) !== false;
        }

        return _self;
    }

    // visits each node in a pre-order depth-first traversal
    // the callback gets 'this' set to the current node 
    // the callback gets an argument set to the zero-based index of the node during the traversal
    // the callback gets an argument set to the contents of the current node
    // the callback must return a transformation of the contents of the current node
    // the callback can be left undefined so that no transformation will be applyed
    function map(callback) {

        if (typeof callback == 'undefined') {
            callback = function(index, contents) {
                return contents;
            }
        }
        var nodes_to_create = [];
        var new_tree = [];

        each(function(index) {
            //logit(this);

            var contents = callback.call(this, index, this.contents);
            var new_node = Node(contents, []);
            node_created();

            var degree = this.children.length;
            if (degree) {
                // internal node
                new_tree.push(new_node);
                nodes_to_create.push(degree);
            }
            else {
                // leaf node
                add_child(new_node);
                if (is_last_child()) {
                    nodes_to_create.pop();
                    var last = new_tree.pop();
                    add_child(last);
                }
            }

        });

        return new_tree[0];

        //---

        function logit(node) {
            console.log(JSON.stringify({
                nodes_to_create: nodes_to_create,
                new_tree: new_tree
            }));
        }

        function last(array, value) {
            return array[array.length - 1];
        }

        function node_created() {
            var rest = nodes_to_create.pop();
            if (rest) {
                nodes_to_create.push(--rest);
            }
        }

        function is_last_child() {
            return last(nodes_to_create) == 0;
        }

        function add_child(new_node) {
            last(new_tree).children.push(new_node);
        }

    }

}

var myTreeNodes = TreeTraversal(tree);

console.log('--');
var new_tree = myTreeNodes.map();

console.log('--');
console.log(new_tree);

The console output is exactly the same as the one we got when building tree.

Note that both .each() and .map() of TreeTraversal share the same signature and functionality with .each() and .map() of jQuery respectively.

Here is a jsFiddle if you want to play around.

How to know whether two values are equal or not

JavaScript’s abstract (==) and strict (===) equality operators work fine for scalars like 1, 2, and ‘2’:

1 == 2     || false
2 == '2'   && true
2 === '2'  || false

Objects are another story. Two distinct objects are never equal for either strictly or abstract comparisons. An expression comparing objects is only true if the operands reference the same object. In other words, JavaScript’s equality operators applied to objects only compare their references. That is not very useful.

So I wrote a jQuery plugin to fix that issue.

$.equals = function( a, b, options ) {

    options = $.extend({

        // if true, in case of inequality, a trace of the first difference is logged to the console
        verbose: false,

        // use 'strict' for ===, 'abstract' for ==
        comparison: 'strict',

        // if true, an item is compared, otherwise it is ignored
        // receives a value and a key, returns true or false
        filter: function () {
            return true;
        }

    }, options || {});

    var initial_step = false;
    if (typeof options.trace == 'undefined') {
        initial_step = true;
        options.trace = [];
    }

    if (options.comparison == 'strict' ? a === b : a == b) {
        return true;
    }

    var a_type = $.type(a);
    var b_type = $.type(b);
    if (! (a_type == b_type)) {
        log(['types', a_type, b_type, a, b]);
        return false;
    }

    var a_keys = decompose(a, options.filter).keys;
    var b_keys = decompose(b, options.filter).keys;

    var a_length = a_keys.length;
    var b_length = b_keys.length;
    if (! (a_length == b_length)) {
        log(['lengths', a_length, b_length, a, b]);
        return false;
    }

    var length = a_length; // == b_length
    if (length == 0) { // a and b are scalars
        return a === b;
    }

    // compare corresponding elements
    for (var i = 0; i < length; i++) {
        var key = a_keys[i];
        var a_value = a[key];
        var b_value = b[key];
        var equal = $.equals( a_value, b_value, options );
        if (! equal) {
            log(['values at key "' + key + '"', a_value, b_value, a, b]);
            return false;
        }
    }

    if (initial_step) {
        log(['values', null, null, a, b]);
    }
    return true;

    //---

    // returns true if the given object is a scalar (not in JavaScript terms, though...)
    function isScalarValue(object) {
        return object === null || /undefined|boolean|number|string/.test(typeof object);
    }

    // returns a plain object with the given object's items correspondingly split into keys and values arrays
    function decompose(object, filter) {
        if (isScalarValue(object)) {
            return {
                keys: [],
                values: filter(object) ? object : null
            };
        }
        var keys = [];
        var values = [];
        for (var key in object) {
            var value = object[key];
            if (filter(value, key)) {
                keys.push(key);
                values.push(value);
            }
        }
        return {keys: keys, values: values};
    }

    // returns a plain object with the given decomposed's keys and values arrays correspondingly joined into items
    // note that
    //     (1) compose(decompose(plain_object)) == plain_object
    //     (2) compose(decompose(non_plain_object)) != non_plain_object
    function compose(decomposed) {
        var result = {};
        for (var i = 0, iTop = decomposed.keys.length; i < iTop; i++) {
            result[decomposed.keys[i]] = decomposed.values[i];
        }
        return result;
    }

    // logs given data to the console if it's the initial_step log, otherwise it only remembers
    function log(data) {
        if (! options.verbose) return;
        options.trace.push(data);
        if (initial_step) {
            console_log('');
            while (options.trace.length) {
                console_log('  ');
            }
        }

        //---

        function console_log(prefix) {
            var top = options.trace.pop();
            var data = compose({keys: ['difference', 'a_attribute', 'b_attribute', 'a', 'b'], values: top});
            if ('values' == data.difference) {
                console.log(' ($.equals)', prefix + 'comparing(', data.a, ', ', data.b, ')\n',
                    '($.equals)', prefix + '    found different values');
            }
            else {
                console.log(' ($.equals)', prefix + 'comparing(', data.a, ', ', data.b, ')\n',
                    '($.equals)', prefix + '    found different', data.difference, data.a_attribute, '!=', data.b_attribute);
            }
        }
    }
};

Here are some simple use cases

var count = 0;

console.log('--\n Case ' + (++count) + ': two undefined values');
console.log($.equals(undefined, undefined, {verbose: true}));

console.log('--\n Case ' + (++count) + ': two null values');
console.log($.equals(null, null, {verbose: true}));

console.log('--\n Case ' + (++count) + ': undefined and null values');
console.log($.equals(undefined, null, {verbose: true}));

console.log('--\n Case ' + (++count) + ': undefined and null values -- abstract comparison');
console.log($.equals(undefined, null, {verbose: true, comparison: 'abstract'}));

console.log('--\n Case ' + (++count) + ': two NaN values');
console.log($.equals(NaN, NaN, {verbose: true}));

console.log('--\n Case ' + (++count) + ': two different true values');
console.log($.equals(1, true, {verbose: true}));

console.log('--\n Case ' + (++count) + ': two different false values -- abstract comparison');
console.log($.equals(0, false, {verbose: true, comparison: 'abstract'}));

console.log('--\n Case ' + (++count) + ': same integer 123, 123');
console.log($.equals(123, 123, {verbose: true}));

console.log('--\n Case ' + (++count) + ': same number 123, Number(123)');
console.log($.equals(123, Number(123), {verbose: true}));

console.log('--\n Case ' + (++count) + ': same numeric value 123, 123.0');
console.log($.equals(123, 123.0, {verbose: true}));

console.log('--\n Case ' + (++count) + ': same numeric value 123, "123"');
console.log($.equals(123, "123", {verbose: true}));

console.log('--\n Case ' + (++count) + ': same numeric value 123, "123" -- abstract comparison');
console.log($.equals(123, "123", {verbose: true, comparison: 'abstract'}));

console.log('--\n Case ' + (++count) + ': two different strings');
console.log($.equals('a string', 'another string', {verbose: true}));

console.log('--\n Case ' + (++count) + ': two variables referencing the same object');
var test1 = {a: 1, b: 2}, test2 = test1;
console.log($.equals(test1, test2, {verbose: true}));

console.log('--\n Case ' + (++count) + ': an object copy and pasted from another one');
var test1 = {a: 1, b: 2}, test2 = {a: 1, b: 2};
console.log($.equals(test1, test2, {verbose: true}));

console.log('--\n Case ' + (++count) + ': two objects with the same keys and values, but in a different order');
var test1 = {a: 1, b: 2}, test2 = {b: 2, a: 1};
console.log($.equals(test1, test2, {verbose: true}));

console.log('--\n Case ' + (++count) + ': two objects with the same keys and values, but one different value');
var test1 = {a: 1, b: 2}, test2 = {b: 2, a: '1'};
console.log($.equals(test1, test2, {verbose: true}));

console.log('--\n Case ' + (++count) + ': two objects with the same keys and values, but one different value -- abstract comparison');
var test1 = {a: 1, b: 2}, test2 = {b: 2, a: '1'};
console.log($.equals(test1, test2, {verbose: true, comparison: 'abstract'}));

whose console output is

--
 Case 1: two undefined values
true
--
 Case 2: two null values
true
--
 Case 3: undefined and null values
 ($.equals) comparing( undefined ,  null )
 ($.equals)     found different types undefined != null
false
--
 Case 4: undefined and null values -- abstract comparison
true
--
 Case 5: two NaN values
false
--
 Case 6: two different true values
 ($.equals) comparing( 1 ,  true )
 ($.equals)     found different types number != boolean
false
--
 Case 7: two different false values -- abstract comparison
true
--
 Case 8: same integer 123, 123
true
--
 Case 9: same number 123, Number(123)
true
--
 Case 10: same numeric value 123, 123.0
true
--
 Case 11: same numeric value 123, "123"
 ($.equals) comparing( 123 ,  123 )
 ($.equals)     found different types number != string
false
--
 Case 12: same numeric value 123, "123" -- abstract comparison
true
--
 Case 13: two different strings
false
--
 Case 14: two variables referencing the same object
true
--
 Case 15: an object copy and pasted from another one
 ($.equals) comparing( Object {a: 1, b: 2} ,  Object {a: 1, b: 2} )
 ($.equals)     found different values
true
--
 Case 16: two objects with the same keys and values, but in a different order
 ($.equals) comparing( Object {a: 1, b: 2} ,  Object {b: 2, a: 1} )
 ($.equals)     found different values
true
--
 Case 17: two objects with the same keys and values, but one different value
 ($.equals) comparing( Object {a: 1, b: 2} ,  Object {b: 2, a: "1"} )
 ($.equals)     found different values at key "a" 1 != 1
 ($.equals)   comparing( 1 ,  1 )
 ($.equals)       found different types number != string
false
--
 Case 18: two objects with the same keys and values, but one different value -- abstract comparison
 ($.equals) comparing( Object {a: 1, b: 2} ,  Object {b: 2, a: "1"} )
 ($.equals)     found different values
true

Advanced usage

That’s all well and fun, but comparing non-plain objects is where $.equals shines.

With that in mind, I wrote two symmetrical operations for jQuery: scatter and gather. In Maths multiplication distributes over addition, allowing you to scatter a x (b + c) to a x b + a x c, and gather back a x b + a x c to a x (b + c). Likewise with these two operations, you can scatter jQuery over its elements: $:[ e1, e2 ] to [ $:e1, $:e2 ] and gather back [ $:e1, $:e2 ] to $:[ e1, e2 ].

//scatter( $:[ e1, e2, ... ] ) == [ $:e1, $:e2, ... ]
$.fn.scatter = function () {
    return $.map(this, function (el) { return $(el); });
};

//gather( [ $:e1, $:e2, ... ] ) == $:[ e1, e2, ... ]
$.gather = function(array) {
    return $($.map(array, function ($el) { return $el.get(); }));
};

Here are some advanced use cases

var object_$ = $('*');
var array_$_toArray = object_$.toArray();
var object_$_toArray = $(array_$_toArray);
var array_$_scatter = object_$.scatter();
var object_gather_$_scatter = $.gather(array_$_scatter);

function isDomElement(item) {
    return (item instanceof HTMLElement);
}

console.log('--\n Case ' + (++count) + ': object_$, $(object_$.toArray())');
console.log($.equals(object_$, object_$_toArray, {verbose: true}));

console.log('--\n Case ' + (++count) + ': object_$, $(object_$.toArray()) -- DOM Elements only');
console.log($.equals(object_$, object_$_toArray, {verbose: true, filter: isDomElement}));

console.log('--\n Case ' + (++count) + ': object_$, $.gather(object_$.scatter())');
console.log($.equals(object_$, object_gather_$_scatter, {verbose: true}));

console.log('--\n Case ' + (++count) + ': object_$, $.gather(object_$.scatter()) -- DOM Elements only');
console.log($.equals(object_$, object_gather_$_scatter, {verbose: true, filter: isDomElement}));

console.log('--\n Case ' + (++count) + ': $(object_$.toArray()), $.gather(object_$.scatter())');
console.log($.equals(object_$_toArray, object_gather_$_scatter, {verbose: true}));

console.log('--\n Case ' + (++count) + ': $("body"), $("head") -- DOM Elements only');
console.log($.equals($("body"), $("head"), {verbose: true, filter: isDomElement}));

whose console output is

--
 Case 19: object_$, $(object_$.toArray())
 ($.equals) comparing( 
[html, head, meta, title, script, link, style, script, body, p, div, span, prevObject: jQuery.fn.init[1], context: document, selector: "*", jquery: "1.11.2-pre ... ]
 ,  
[html, head, meta, title, script, link, style, script, body, p, div, span, jquery: "1.11.2-pre ... ]
 )
 ($.equals)     found different lengths 163 != 161
false
--
 Case 20: object_$, $(object_$.toArray()) -- DOM Elements only
 ($.equals) comparing( 
[html, head, meta, title, script, link, style, script, body, p, div, span, prevObject: jQuery.fn.init[1], context: document, selector: "*", jquery: "1.11.2-pre ... ]
 ,  
[html, head, meta, title, script, link, style, script, body, p, div, span, jquery: "1.11.2-pre ... ]
 )
 ($.equals)     found different values
true
--
 Case 21: object_$, $.gather(object_$.scatter())
 ($.equals) comparing( 
[html, head, meta, title, script, link, style, script, body, p, div, span, prevObject: jQuery.fn.init[1], context: document, selector: "*", jquery: "1.11.2-pre ... ]
 ,  
[html, head, meta, title, script, link, style, script, body, p, div, span, jquery: "1.11.2-pre ... ]
 )
 ($.equals)     found different lengths 163 != 161
false
--
 Case 22: object_$, $.gather(object_$.scatter()) -- DOM Elements only
 ($.equals) comparing( 
[html, head, meta, title, script, link, style, script, body, p, div, span, prevObject: jQuery.fn.init[1], context: document, selector: "*", jquery: "1.11.2-pre ... ]
 ,  
[html, head, meta, title, script, link, style, script, body, p, div, span, jquery: "1.11.2-pre ... ]
 )
 ($.equals)     found different values
true
--
 Case 23: $(object_$.toArray()), $.gather(object_$.scatter())
 ($.equals) comparing( 
[html, head, meta, title, script, link, style, script, body, p, div, span, jquery: "1.11.2-pre ... ]
 ,  
[html, head, meta, title, script, link, style, script, body, p, div, span, jquery: "1.11.2-pre ... ]
 )
 ($.equals)     found different values
true
--
 Case 24: $("body"), $("head") -- DOM Elements only
 ($.equals) comparing( 
[body, prevObject: jQuery.fn.init[1], context: document, selector: "body", jquery: "1.11.2-pre ... ]
 ,  
[head, prevObject: jQuery.fn.init[1], context: document, selector: "head", jquery: "1.11.2-pre ... ]
 )
 ($.equals)     found different values at key "0" <body>​…​</body>​ != <head>​…​</head>​
 ($.equals)   comparing( <body>​…​</body>​ ,  <head>​…​</head>​ )
 ($.equals)       found different values at key "lastElementChild" <div>​…​</div>​ != <script type=​"text/​javascript">​…​</script>​
 ($.equals)   comparing( <div>​…​</div>​ ,  <script type=​"text/​javascript">​…​</script>​ )
 ($.equals)       found different lengths 6 != 3
false

As you see, even if by looking at the console you can easily spot outstanding differences thanks to the great object inspection offered by the browser, my plugin is dumber by design, and in general it finds different differences. The important thing is that you get a false or a true when you really expect a false or a true, respectively.

In particular,

  • case 19 and 20 tell us that object_$ and $(object_$.toArray()) are not identical but they contain the same DOM elements
  • case 21 and 22 tell us that object_$ and $.gather(object_$.scatter()) are not identical but they contain the same DOM elements
  • case 23 tells us that $(object_$.toArray()) and $.gather(object_$.scatter()) are identical (but still different objects)
  • case 24 tells us that $(“body”) and $(“head”) are different… as expected :-)
    • please notice that they are both jQuery objects containing only one DOM element, but given the way the filter is written, $.equals tries to compare also lastElementChild. We could have taken the key into consideration like filter: function(value, key) { … }

Here is a jsFiddle if you want to play around.

 

© 2017 Notes Log

Theme by Anders NorenUp ↑