2013-11-06
A C++ Detokenizer

I wrote this as an example for some of my colleagues not as familiar with modern C++. This C++ file contains a few examples of ways to implement a string detokenize(vector<string> tokens, string delim) function, which will concatenate the tokens in (tokens), with (delim) inserted between, and ensuring that no terminal (delim) is inserted.

For example:

detokenize( {"aaa", "bbb"}, "/" ) --> "aaa/bbb"

Here is the C++ source (reformatted to fit a narrower window than I normally use):

#include <algorithm>
#include <vector>
#include <string>

using namespace std;

//  This is a naive basic version that uses string concatenation to
//  gradually build up the detokenized string. This is not an
//  efficient way to do this, because string concatenation doesn't
//  scale well, requiring memory allocation and deallocation.

static string detokenizeV1(
    const vector< string >&     tokens,
    const string&               delim = "")
{
    string  result;
    if (!tokens.empty())
    {
        for (auto token : tokens)
        {
            result += token;
            result += delim;
        }

        // Strip the excess delim from the end
        result.resize(result.size() - delim.size());
    }

    return result;
}



#include <sstream>


//  Definitely the fastest and most scalable version, this uses
//  ostringstream and avoids adding and then removing the final
//  delimiter.

static string detokenizeV2(
    const vector< string >&     tokens,
    const string&               delim = "")
{
    string                      result;

    if (!tokens.empty())
    {
        ostringstream       buf;
        auto                lastElementIter(tokens.end() - 1);

        for_each(tokens.begin(), lastElementIter,
            [delim, &buf](const string& token)
            {
                buf << token << delim;
            });
        buf << *lastElementIter;
        result = buf.str();
    }

    return result;
}


//  Here's the same version, but now we'll use the std::copy()
//  algorithm directly into an ostream_iterator.

static string detokenizeV3(
    const vector< string >&     tokens,
    const string&               delim = "") // Default delim is ""
{
    string                      result;

    if (!tokens.empty())
    {
        ostringstream       buf;

        copy(
            tokens.begin(),
            tokens.end() - 1,
            std::ostream_iterator<string>(buf, delim.c_str()));
        buf << tokens.back();
        result = buf.str();
    }

    return result;
}


#include <iostream>

//  Here's a traditional way to define a typedef for a function
//  pointer.

typedef string detokenizer_signature_t(
                const vector< string >&,
                const string&);

//  Here's a way to do this in C++11, where we'll just ask the
//  compiler to extract the type from one of our known detokenizer
//  functions.

typedef decltype(detokenizeV1)  detokenizer_t;


//   So I don't have to write tests for each of my detokenizers, I'll
//   write a generic tester and pass in a function pointer.

static void exerciser(
    const string& title,
    detokenizer_t detokenizerFunction)
{
    vector< string >        tokenList({"A", "BB", "CCC", "DDDD"});
    vector< string >        singleList({"A"});
    vector< string >        emptyList({});

    std::cout << title << "(tokenList)=" <<
        detokenizerFunction(tokenList, "////") << endl << endl;
    std::cout << title << "(singleList)=" <<
        detokenizerFunction(singleList, "") << endl << endl;
    std::cout << title << "(emptyList)=" <<
        detokenizerFunction(emptyList, "") << endl << endl;
}


int main(int argc, const char * argv[])
{
    exerciser("detokenizeV1", detokenizeV1);
    exerciser("detokenizeV2", detokenizeV2);
    exerciser("detokenizeV3", detokenizeV3);

    return 0;
}