A C++ Detokenizer
I wrote this as an example for some of my colleagues not as familiar with
modern C++. This C++ file contains a few examples of ways to implement a
string detokenize(vector<string> tokens, string delim)
function, which will
concatenate the tokens in (tokens), with (delim) inserted between, and
ensuring that no terminal (delim) is inserted.
For example:
detokenize( {"aaa", "bbb"}, "/" ) --> "aaa/bbb"
Here is the C++ source (reformatted to fit a narrower window than I normally use):
#include <algorithm>
#include <vector>
#include <string>
using namespace std;
// This is a naive basic version that uses string concatenation to
// gradually build up the detokenized string. This is not an
// efficient way to do this, because string concatenation doesn't
// scale well, requiring memory allocation and deallocation.
static string detokenizeV1(
const vector< string >& tokens,
const string& delim = "")
{
string result;
if (!tokens.empty())
{
for (auto token : tokens)
{
result += token;
result += delim;
}
// Strip the excess delim from the end
result.resize(result.size() - delim.size());
}
return result;
}
#include <sstream>
// Definitely the fastest and most scalable version, this uses
// ostringstream and avoids adding and then removing the final
// delimiter.
static string detokenizeV2(
const vector< string >& tokens,
const string& delim = "")
{
string result;
if (!tokens.empty())
{
ostringstream buf;
auto lastElementIter(tokens.end() - 1);
for_each(tokens.begin(), lastElementIter,
[delim, &buf](const string& token)
{
buf << token << delim;
});
buf << *lastElementIter;
result = buf.str();
}
return result;
}
// Here's the same version, but now we'll use the std::copy()
// algorithm directly into an ostream_iterator.
static string detokenizeV3(
const vector< string >& tokens,
const string& delim = "") // Default delim is ""
{
string result;
if (!tokens.empty())
{
ostringstream buf;
copy(
tokens.begin(),
tokens.end() - 1,
std::ostream_iterator<string>(buf, delim.c_str()));
buf << tokens.back();
result = buf.str();
}
return result;
}
#include <iostream>
// Here's a traditional way to define a typedef for a function
// pointer.
typedef string detokenizer_signature_t(
const vector< string >&,
const string&);
// Here's a way to do this in C++11, where we'll just ask the
// compiler to extract the type from one of our known detokenizer
// functions.
typedef decltype(detokenizeV1) detokenizer_t;
// So I don't have to write tests for each of my detokenizers, I'll
// write a generic tester and pass in a function pointer.
static void exerciser(
const string& title,
detokenizer_t detokenizerFunction)
{
vector< string > tokenList({"A", "BB", "CCC", "DDDD"});
vector< string > singleList({"A"});
vector< string > emptyList({});
std::cout << title << "(tokenList)=" <<
detokenizerFunction(tokenList, "////") << endl << endl;
std::cout << title << "(singleList)=" <<
detokenizerFunction(singleList, "") << endl << endl;
std::cout << title << "(emptyList)=" <<
detokenizerFunction(emptyList, "") << endl << endl;
}
int main(int argc, const char * argv[])
{
exerciser("detokenizeV1", detokenizeV1);
exerciser("detokenizeV2", detokenizeV2);
exerciser("detokenizeV3", detokenizeV3);
return 0;
}