Splitting String to Words Array

Now I’m in the process of creating Action Script (or any script) color highlighter in flash. Which can be used in forums to post the ActionScript
As and when the user types a property or keyword it will highlight in the specified color.

I’m planning to set the .htmlText property every time he types some thing.
That is I will find and replace all the keywords with appropriate <font> tags.
This process will become complicated when the string is too long (too many looping) to avoid this what I’m planning is to split the string in to words & non words put into an Array

For example

txt=”Huh! Multimedia?”
will be kept in Words List like this
WordsArr=[“Huh”, “! “, “Multimedia”, “?”]
in which first element is the word and second is the nonword and so on

Once I have this array I can get the HTML formatting just by replacing the keywords in the array with <font color=”#0000DD”>+keyword+</font>
finally setting the .htmlText as wordArr.join(”);
When the user continues typing with the Caret position I will find out and add it to the appropriate portion of the array or append a new element to the array.
for splitting up the words I use the following function

[UPDATE] included the ActionScript 3 version as well 🙂 It became lot more simpler with the regular expressions

/*
 ************************************************************
   Developed by R.Arul Kumaran [[email protected]]
   for more code keep visiting [www.luracast.com/all/blog]
 ************************************************************
 */

//getWords returns Array of words and non words from a string

//gets the words in to array elements
function getWords(text:String):Array {
    var r:RegExp = /(w*)(W+)(w*)/gm;
    var o:Object;
    var a:Array = [];
    while ((o = r.exec(text)) != null) {
        if (o[1] != '')
            a.push(o[1])
        a.push(o[2])
        if (o[3] != '')
            a.push(o[3])
    }
    return a
}
/*
Usage:-
 txt="Huh! Multimedia?"
 //get the words array
 wordsArr=getWords(txt);
 //wordsArr now contatins ["Huh", "! ", "Multimedia", "?"]
 //in which first element is the word and second is the nonword and so on
*/
/*
/*
 ************************************************************
   Developed by R.Arul Kumaran [[email protected]]
   for more code keep visiting [www.luracast.com/all/blog]
 ************************************************************
 */

//getWords returns Array of words and non words from a string

//gets the words in to array elements
_global.getWords = function(txt) {
    var Arr = new Array();
    var lastIndex = 0;
    for (var i = 0; i < txt.length; i++) {
        var code = txt.charCodeAt(i);
        var char = txt.charAt(i);
        if (isWord(code)) {
            //trace(" '"+char+"' "+"isWord");
        } else {
            //trace(" '"+char+"' "+"is Not Word");
            var word = txt.substring(lastIndex, i);
            if (word == '') {
                /*
                   empty string that means
                   we have 2 or more non word chars together
                   so add them together
                 */

                if (Arr.length != 0) {
                    Arr[Arr.length - 1] = Arr[Arr.length - 1] + char;
                } else {
                    Arr.push('');
                    Arr.push(char);
                }
            } else {
                Arr.push(word);
                Arr.push(char);
            }
            lastIndex = i + 1;
        }
    }
    if (lastIndex != txt.length) {
        //last char is not a non-word so add the lost word to list
        word = txt.substring(lastIndex, txt.length);
        Arr.push(word);
        Arr.push('');
    }
    trace(Arr.length);
    return Arr;
}

_global.isWord = function(c) {
    /*
       Charcode range for non word chars
       9,10,13
       32 to 47
       58 to 64
       91 to 96
       123 to 126
     */

    //trace(c);
    if (c == 10 || c == 9 || c == 13 || (c >= 32 && c <= 44) || (c >= 46 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126)) {
        return false;
    } else {
        return true;
    }
}
//HTML Encodes the char
_global.htmlEncode = function(chr) {
    switch (chr) {
        case ' ':
            chr = " ";
            break;
        case '<':
            chr = "&lt;";
            break;
        case '>':
            chr = "&gt;";
            break;
        case '&':
            chr = "&amp;";
            break;
        case '"':
            chr = '&quot;';
            break;
        case "'":
            chr = "&apos;";
            break;
    }
    return chr;
}
/*
   Usage:-
   txt="Huh! Multimedia?"
   //get the words array
   wordsArr=getWords(txt);
   //wordsArr now contatins ["Huh", "! ", "Multimedia", "?"]
   //in which first element is the word and second is the nonword and so on
 */

One comment

Leave a Reply