IE+JavaScript Performance Recommendations Part 2: JavaScript Code Inefficiencies

IEBlog

Internet Explorer Team Blog

IE+JavaScript Performance Recommendations Part 2: JavaScript Code Inefficiencies

  • Comments 77

Hello again, this is Peter Gurevich, Performance PM for IE. We have gotten a lot of good feedback from our first post on IE + JavaScript Performance Recommendations so I am eager to hear what you think of our second installment. This is the first of two posts concerning inefficiencies in Javascript code itself.

JScript Code Inefficiencies – Chapter One

There are plenty of ways to use JScript (and other Javascript engines) inefficiently since the interpreter performs almost zero optimizations when interpreting code. Most of the recommendations in the previous post are where a rather large inefficiency shows itself in many different forms. Here we’ll be concentrating on specific inefficiencies related to string manipulation and dynamic code evaluation.

Optimize String Manipulations by Avoiding Intermediate Results

Most JScript code used for manipulating the IE DOM requires some sort of string manipulation. While most of the performance that is lost in string manipulation is due to name resolution and binding of the properties on objects you are working with, much of it can also be explained away by making the wrong assumptions about how strings are handled.

The first piece of erroneous code comes in using the standard string concatenation operator + when working with strings. Strings are stored in Jscript on the heap and a pointer to them is located within the garbage collector. Each intermediate string manipulation results in another temporary allocation of memory. Most languages have some construct for intermediate string results, such as a StringBuilder in .NET or a buffered string class in C++. For the JScript language buffered strings are created using the Array.join function. The following sample creates a larger string by concatenating together many smaller strings.

var smallerStrings = new Array();
// Fill the array with smaller strings
var myRatherLargeString = "";
var length = smallerStrings.length;
for(var index = 0; index<length; index++)
{
    myRatherLargeString += smallerStrings[index];
}

Performance articles often note this type of construct is slow. Looks can be deceiving here since the extra overhead of obtaining the smaller, intermediate strings is making this slow. The above code sample would work much better when rewritten using Array.Join.

var myRatherLargeString = smallerStrings.join(‘’);

Not only is the code simpler, but it is also much faster for this case. When the number of values being concatenated is small enough the concatenation operator still wins. The cross-over point is not entirely deterministic, but exists somewhere in the neighborhood of 7-13 values. For larger strings the count may be much smaller. You’ll have to do performance testing when you choose this approach and make sure it fits your algorithm. The following code sample is optimized to point out the cut-off point.

<script>
var markupBuilder = new Array();
markupBuilder.push("<div id=\"");
markupBuilder.push("sampleID");
markupBuilder.push("\">");
markupBuilder.push("sampleText");
markupBuilder.push("</div>");
var markupBuilder2 = new Array();
markupBuilder2.push("<div id=\"");      // 0
markupBuilder2.push("sampleID");        // 1
markupBuilder2.push("\">");             // 2
markupBuilder2.push("sampleText");      // 3
markupBuilder2.push("</div>");          // 4
markupBuilder2.push("<div id=\"");      // 5
markupBuilder2.push("sampleID");        // 6
markupBuilder2.push("\">");             // 7
markupBuilder2.push("sampleText");      // 8
markupBuilder2.push("</div>");          // 9
markupBuilder2.push("<div id=\"");      // 10
markupBuilder2.push("sampleID");        // 11
markupBuilder2.push("\">");             // 12
markupBuilder2.push("sampleText");      // 13
markupBuilder2.push("</div>");          // 14
markupBuilder2.push("<div id=\"");      // 15
markupBuilder2.push("sampleID");        // 16
markupBuilder2.push("\">");             // 17
markupBuilder2.push("sampleText");      // 18
markupBuilder2.push("</div>");          // 19
function buildDivJoin(id, innerText, short)

    if ( short ) 
   {
        markupBuilder[1] = id;
        markupBuilder[3] = innerText;
        return markupBuilder.join(""); 
   }
     else 
   {
        markupBuilder2[1] = id;
        markupBuilder2[3] = innerText;
        markupBuilder2[6] = id;
        markupBuilder2[8] = innerText; 
        markupBuilder2[11] = id;
        markupBuilder2[13] = innerText;
        markupBuilder2[16] = innerText;
        markupBuilder2[18] = innerText; 
        return markupBuilder2.join(""); 
   }
}
function buildDivConcat(id, innerText, short)

    if ( short ) 
   {
        return "<div id=\"" + id + "\">" + innerText + "</div>"; 
    }
    else 
    { 
        return "<div id=\"" + id + "\">" + innerText + "</div>" +
            "<div id=\"" + id + "\">" + innerText + "</div>" +
            "<div id=\"" + id + "\">" + innerText + "</div>" +
            "<div id=\"" + id + "\">" + innerText + "</div>"; 
    }
}
function perfTestDivs()
{
    var start = (new Date()).getTime();
    for(var i = 0; i < 25000; i++) { buildDivJoin("foo", "innerText", true); } 
    var end = (new Date()).getTime();
    output.innerHTML += "Join short " + (end - start) + "<br>"; 
    start = (new Date()).getTime();
    for(var i = 0; i < 25000; i++) { buildDivConcat("foo", "innerText", true); }
    end = (new Date()).getTime();
    output.innerHTML += "Concat short " + (end - start) + "<br>";
    var start = (new Date()).getTime();
    for(var i = 0; i < 10000; i++) { buildDivJoin("foo", "innerText", false); }
    var end = (new Date()).getTime(); 
    output.innerHTML += "Join long " + (end - start) + "<br>";
    start = (new Date()).getTime();
    for(var i = 0; i < 10000; i++) { buildDivConcat("foo", "innerText", false); }
    end = (new Date()).getTime(); 
    output.innerHTML += "Concat long " + (end - start) + "<br>";
}
function perfTest()

    output.innerHTML = ""; 
    perfTestDivs();
}
</script>
<body>
<button onclick="perfTest()">Performance Test</button>
<div id="output"></div>
</body>

Even with the concatenation operator winning every now and then it isn’t always best to use it when you are unsure about the destination of your operation. As noted in the symbolic look-up section in the previous post , it can be very expensive to work on a DOM property  such as innerHTML and concatenate text a little bit at a time. Even worse, property operations are not guaranteed to round trip so operations performed bit by bit against the DOM may not be the same as performing them all at once.

<head>
<script>
function InjectMangle()
{
      var strContent = "<object>Fallback Text</object>";
      var oDiv = document.getElementById("injectSpot");
      oDiv.innerHTML = strContent;
      setTimeout("CheckInject()", 1000);
}
function CheckInject()
{
      var oDiv = document.getElementById("injectSpot");
      alert(oDiv.innerHTML);
}
</script>
</head>
<body>
<button onclick="InjectMangle();">Perform Injection Check</button>
<div id="injectSpot"></div>
</body>

The general wisdom here is to first create your final strings in local variables if possible and then copy them into the properties on your objects. Especially if the properties you are working with may change their values if you do your work in parts. Concatenation of large strings is done much faster if you use Array.join and you’ll also save a lot of memory that would have been used for storing intermediate string results. Finally, never use the concatenation operator if you are working with properties on objects within IE or any other ActiveX control. If at all possible it is best to avoid the extra name look-ups and potentially expensive property accesses.

Running Code Using the ‘eval’ Statement is Expensive

The title explains it all. The eval statement in JScript is both expensive in terms of performance and prone to error if the site is generating dynamically the script to be run. If you can remove it from your application you should do so. You’ll need to replace it with functional equivalents where the dynamically evaluated code can be simulated by changing the input parameters. This general wisdom not only leads to faster code in most cases but can also make your program more reliable and easy to understand. If arbitrary code is executing on the client machine it becomes hard to determine how it failed.

For those interested in the “why”, execution of an arbitrary string of script involves the construction of a new script runtime, copying the context of the currently executing script, manipulation of the runtime stack, and a bit of other work. This isn’t much different from running other code, but isn’t nearly as fast as writing the inline branching code to handle all of the possibilities of the dynamically generated code. There are obviously good uses for the eval statement, just examine your project thoroughly before taking a dependency on it.

Requirements of Eval for JSON Expressions

Now that we’ve warned against eval let’s talk about when it can be useful! JSON is becoming more and more popular as a data protocol written on top of the JScript language. Along with any protocol it has concerns for security and tampering and therein lies a problem. Security means validating large chunks of data for potentially malicious code and of course all of this takes time. The purpose behind JSON was to quickly and easily transfer data into in memory usable structures, something faster than parsing, and in some cases it can actually be slower so you have to be careful. Those cases where it tends to be slower are when your data strings begin to get extremely large. The evaluation time of the security regular expression and creating a temporary copy of the script to be evaluated (both properties of the current standard JSON parser) start to add up. When your JSON requests start generating 200K character strings you might want to think about your process.

What can you do here? Well, if you use JSON follow the standards and don’t try to change the published protocols since that could negatively affect the performance in one or more browsers that you don’t test against. Make sure you put constraints on your data sizes so that the JSON protocol evaluation doesn’t take too long and hang the browser. Remember, while those structures are being built, nothing else can be done.

Switch Blocks are Linear Evaluation Tables

The switch statement doesn’t know that the values are sorted in some way that lends itself to effectively a nested if structure – most languages will heavily optimize this, as Justin talked about on his blog a couple of years ago, but JavaScript doesn’t. If you have a large switch it is best to break this down into a series of nested if statement comparisons, an array where you can perform a binary search, or my personal favorite a sparse array (hash-table). The following code sample translates id values to their string identifiers. This is a common feature to reduce the amount of data transferred between the client and the server.

<script>
function mapIdToString(id)

    switch(id) 
    { 
        case 0: 
            return "User Record"; 
        case 1: 
            return "Management Record"; 
        case 2: 
            return "Product Record"; 
        case 3: 
            return "Sale Record"; 
        default: 
            return "Product Identifier " + id; 
    }
}
</script>

As you can see we have a couple of common cases and a default clause that maps anything left over. We might be calculating a few thousand of these over and over again and running through the same switch. As we add more known values the number of comparisons increases linearly. At some point the switch becomes increasingly slower than writing the expanded set of if statements with range checks (a static binary search of the data). In fact, a simple optimization depending on the data, might just be to add a check for any value that would end up in the default clause and prevent doing all of the extra known value checks just to create a new product identifier.

For larger data sets you can use an array to map id’s to names based on indexing into the array. If the id is a sparse value (not incrementing) then you might need to sort the array and perform a binary search for the id of interest. This requires maintaining some structure that can hold both the id and the string value and is outside of the scope of the article, implementing the sort algorithm for this structure, and creating a binary search routine in JScript. While we could add that type of information here the next option is much easier to implement and provides nearly all of the same benefits.

JScript maintains an associative array (also known as a hash-table) that can be used for any mapping operations. A simple HTML tag parser might take advantage of this by inserting functions for handling a particular tag type into a hash-table indexed by tag name. This would be significantly more robust and surprisingly faster than the equivalent switch statement. As a technical note on the script itself, we’ve placed a try…catch block to catch malformed input. Where this block should reside and what happens on error is up to the algorithm. The normal switch statement is already robust to malformed input.

<script>
function Parser_Div(tokenizer, perf)

      if ( !perf ) 
            output.innerHTML += "Parsing a DIV<br>";
}
function Parser_Span(tokenizer, perf)

      if ( !perf ) 
            output.innerHTML += "Parsing a SPAN<br>";
}
function Parser_Para(tokenizer, perf)

      if ( !perf ) 
            output.innerHTML += "Parsing a P<br>";
}
var o = new Array();
o["DIV"] = Parser_Div;
o["SPAN"] = Parser_Span;
o["P"] = Parser_Para;
function Parser(perf)

      if ( !perf ) 
            output.innerHTML = ""; // Clear our output 
      var tokenizer = source.innerText.split(' '); 
      for(var i = 0; i < tokenizer.length; i++) 
      { 
            o[tokenizer[i]](tokenizer, perf); 
      }
}
function SwitchParser(perf)

      var tokenizer = source.innerText.split(' '); 
      for(var i = 0; i < tokenizer.length; i++) 
      { 
            switch(tokenizer[i]) 
            { 
                  case "DIV": Parser_Div(tokenizer, perf); 
                  case "SPAN": Parser_Span(tokenizer, perf); 
                  case "P": Parser_Para(tokenizer, perf); 
            } 
      }
}
function Perf()

      output.innerHTML = ""; 
      var start = (new Date()).getTime(); 
      for(var i = 0; i < 5000; i++) { Parser(true); } 
      var end = (new Date()).getTime(); 
      output.innerHTML += "Hash Parser " + (end - start) + "<br>"; 
      start = (new Date()).getTime(); 
      for(var i = 0; i < 5000; i++) { SwitchParser(true); } 
      end = (new Date()).getTime(); 
      output.innerHTML += "Switch Parser " + (end - start) + "<br>"; 
      start = (new Date()).getTime(); 
      for(var i = 0; i < 5000; i++) { Parser(true); } 
      end = (new Date()).getTime(); 
      output.innerHTML += "Hash Parser " + (end - start) + "<br>"; 
      start = (new Date()).getTime();       for(var i = 0; i < 5000; i++) { SwitchParser(true); } 
      end = (new Date()).getTime(); 
      output.innerHTML += "Switch Parser " + (end - start) + "<br>";
}
</script>
<button onclick="Parser(false);">Parse!</button>
<button onclick="Perf();">Perf!</button>
<div id="source">DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P DIV SPAN P</div>
<div id="output"></div>

For such a small number the switch is likely faster, as we start growing the number of tags handled the hash-table will eventually win. Depending on the format of your data make use of the appropriate constructs and avoid the switch when a large number of comparisons are going to be needed.

That’s all for Part 2.

Thanks,

Peter Gurevich
Program Manager

Justin Rogers
Software Development Engineer

  • Loading...