There is a performance problem in the simplified implement of std::function
we have talked about at the former post, that is a dynamic allocation is called for no matter what wrapped in the function object, and when the copy-constructor is called, which often happens when the function object is saved into a container, another dynamic allocation is fired again. This probably results in heavy performance problem.
So we are now going to see if there are some approaches to avoid dynamic allocations.
We want to get rid of new
but we also need polymorphism, so the solution is to maintenance a set of "virtual function" by ourselves. Well this is where we need function pointers.
template <typename Ret, typename Arg0, typename Arg1>
class function<Ret(Arg0, Arg1)> {
// since we don't know what exact type the wrapped object is
// store it as void*
void* callable_ptr;
// function pointers to replace the virtual functions
Ret (* call_fn)(function*, Arg0, Arg1);
void* (* clone_fn)(function const*);
void (* destruct_fn)(function*);
// The following static template functions are for the function pointers above.
// Their generally usage is to cast the 'callable_ptr' into correct type.
// By specializing these template functions with a certain 'Functor' type
// and assigning the specialized functions to the pointers above,
// we can get the stored functor object working properly.
template <typename Functor>
static Ret call(function* self, Arg0 arg0, Arg1 arg1)
{
return (*static_cast<Functor*>(self->callable_ptr))(arg0, arg1);
}
template <typename Functor>
static void* clone(function const* src)
{
new new Functor(*static_cast<Functor const*>(src->callable_ptr));
}
template <typename Functor>
static void destruct(function* self)
{
delete static_cast<Functor*>(self->callable_ptr);
}
public:
// when this function object is instantiated with a certain object
// use the type of this object to specialize above template functions
template <typename F>
function(F f)
: call_fn(call<F>)
, clone_fn(clone<F>)
, destruct_fn(destruct<F>)
, callable_ptr(new F(f))
{}
function(function const& rhs)
: call_fn(rhs.call_fn)
, clone_fn(rhs.clone_fn)
, destruct_fn(rhs.destruct_fn)
, callable_ptr(clone_fn(rhs.callable_ptr)) // when copy-constructing, use the 'clone_fn'
{}
function& operator=(function const& rhs)
{
destruct_fn(callable_ptr);
call_fn = rhs.call_fn;
clone_fn = rhs.clone_fn;
destruct_fn = rhs.destruct_fn;
callable_ptr = clone_fn(rhs.callable_ptr); // and so is copy-assigning
}
// and similar situations we make use of 'destruct_fn' and 'call_fn'
~function()
{
destruct_fn(callable_ptr);
}
Ret operator()(Arg0 arg0, Arg1 arg1)
{
return call_fn(callable_ptr, arg0, arg1);
}
};
By doing all the thing above we eliminated inheritance but still have dynamic allocation new
in it. Now we are going to fix it.
We are using a void*
to point to the functor object, right? How about fit the functor object into the space of that void*
? Yes, you may guess it. The trick to do is placement allocation. For those objects whose sizes are smaller than the size of a pointer, we specialize function templates that performs placement new and delete.
To do this, we also need a template class for tag dispatching, a class that converting compile time boolean to type for overloading. So here is the code
// template class for tag dispatching
template <bool placement>
struct Placement {};
template <>
class function<Ret(Arg0, Arg1)> {
// ...
private:
// two templates for placement new and dynamic allocation
template <typename F>
void init(F f, Placement<false>)
{
// if the size of a pointer is not enough for the functor
// same thing happens
call_fn = call<F>;
clone_fn = clone<F>;
destruct_fn = destruct<F>;
callable_ptr = new F(f);
}
template <typename F>
void init(F f, Placement<true>)
{
// otherwise, use placement function set
// see the implement of '_placement' below
call_fn = call_placement<F>;
clone_fn = clone_placement<F>;
destruct_fn = destruct_placement<F>;
new (&this->callable_ptr) F(f);
}
public:
template <typename F>
function(F f)
{
// determine which 'init' to use by the size of the functor object
init(f, Placement<(sizeof(F) <= sizeof(callable_ptr))>());
}
private:
// use these 2 functions to obtain the address of 'callable_ptr' of a function object
static void* addr_of_callable(function* f)
{
return &f->callable_ptr;
}
static void const* addr_of_callable(function const* f)
{
return &f->callable_ptr;
}
// implements for '_placement' functions
template <typename Functor>
static Ret call_placement(function* self, Arg0 arg0, Arg1 arg1)
{
// call the function in a similar way
return (*static_cast<Functor*>(addr_of_callable(self)))(arg0, arg1);
}
template <typename Functor>
static void destruct_placement(function* self)
{
// call the destructor without touching the storage space itself
static_cast<Functor*>(addr_of_callable(self))->~Functor();
}
template <typename Functor>
static void* clone_placement(function const* src)
{
// ??? not figured out yet
}
};
There is a small problem, that is how the 'clone_placement' is supposed to work. Well, in this situation, we definately cannot placement new an object and returns its address. We need some changes.
template <>
class function<Ret(Arg0, Arg1)> {
// ...
// clone function does not return anything any more,
// but to take the address of a destination function object
void (* clone_fn)(function*, function const*);
// the dynamic allocation approach is changed like this
// the 'callable_ptr' of destination is overwritten as the `new`ed object
template <typename Functor>
static void clone(function* dst, function const* src)
{
dst->callable_ptr = new Functor(*static_cast<Functor const*>(src->callable_ptr));
}
// and this is for placement new
template <typename Functor>
static void clone_placement(function* dst, function const* src)
{
new (addr_of_callable(dst)) Functor(*static_cast<Functor const*>(addr_of_callable(src)));
}
};
So this is how we solve this problem.
The following code is a complete implement along with working example.
#include <iostream>
template <bool placement>
struct Placement {};
template <typename T>
class function;
template <typename Ret, typename Arg0, typename Arg1>
class function<Ret(Arg0, Arg1)> {
typedef Ret (* CallFn)(function*, Arg0, Arg1);
typedef void (* CloneFn)(function*, function const*);
typedef void (* DestructFn)(function*);
CallFn call_fn;
CloneFn clone_fn;
DestructFn destruct_fn;
void* callable_ptr;
template <typename Functor>
static Ret call(function* self, Arg0 arg0, Arg1 arg1)
{
return (*static_cast<Functor*>(self->callable_ptr))(arg0, arg1);
}
template <typename Functor>
static void clone(function* dst, function const* src)
{
dst->callable_ptr = new Functor(*static_cast<Functor const*>(src->callable_ptr));
}
template <typename Functor>
static void destruct(function* self)
{
delete static_cast<Functor*>(self->callable_ptr);
}
static void* addr_of_callable(function* f)
{
return &f->callable_ptr;
}
static void const* addr_of_callable(function const* f)
{
return &f->callable_ptr;
}
template <typename Functor>
static Ret call_placement(function* self, Arg0 arg0, Arg1 arg1)
{
return (*static_cast<Functor*>(addr_of_callable(self)))(arg0, arg1);
}
template <typename Functor>
static void clone_placement(function* dst, function const* src)
{
new (addr_of_callable(dst)) Functor(*static_cast<Functor const*>(addr_of_callable(src)));
}
template <typename Functor>
static void destruct_placement(function* self)
{
static_cast<Functor*>(addr_of_callable(self))->~Functor();
}
template <typename F>
void init(F f, Placement<false>)
{
call_fn = call<F>;
clone_fn = clone<F>;
destruct_fn = destruct<F>;
callable_ptr = new F(f);
}
template <typename F>
void init(F f, Placement<true>)
{
call_fn = call_placement<F>;
clone_fn = clone_placement<F>;
destruct_fn = destruct_placement<F>;
new (&this->callable_ptr) F(f);
}
public:
template <typename F>
function(F f)
{
init(f, Placement<(sizeof(F) <= sizeof(callable_ptr))>());
}
function(function const& rhs)
: call_fn(rhs.call_fn)
, clone_fn(rhs.clone_fn)
, destruct_fn(rhs.destruct_fn)
{
clone_fn(this, &rhs);
}
function& operator=(function const& rhs)
{
destruct_fn(this);
call_fn = rhs.call_fn;
clone_fn = rhs.clone_fn;
destruct_fn = rhs.destruct_fn;
clone_fn(this, &rhs);
}
~function()
{
destruct_fn(this);
}
Ret operator()(Arg0 arg0, Arg1 arg1)
{
return call_fn(this, arg0, arg1);
}
};
int fn_ptr(int x, int y)
{
return x + y;
}
struct TwoInts {
TwoInts(int m_, int n_)
: m(m_)
, n(n_)
{}
int operator()(int x, int y)
{
return x + y + m + n;
}
int m;
int n;
};
struct FourInts {
FourInts(int m_, int n_, int p_, int q_)
: m(m_)
, n(n_)
, p(p_)
, q(q_)
{}
int operator()(int x, int y)
{
return x + y + m + n + p + q;
}
int m;
int n;
int p;
int q;
};
int main()
{
// 以函数指针构造
::function<int(int, int)> f(fn_ptr);
std::cout << f(1, 2) << std::endl;
// 以函数对象构造
::function<int(int, int)> g(TwoInts(10, 20));
std::cout << g(1, 2) << std::endl;
// 复制构造
::function<int(int, int)> h(g);
std::cout << h(3, 4) << std::endl;
// 赋值算符
h = f;
std::cout << h(3, 4) << std::endl;
// 以超过指针尺寸的函数对象构造的 function 赋值
h = ::function<int(int, int)>(FourInts(3, 4, 5, 6));
std::cout << h(1, 2) << std::endl;
return 0;
}
For more information:
The implement above is very similar to the implement shipped with GCC in Ubuntu 16.04 LTS, but not exactly the same. The GCC version is more complicated like it combines destruct_fn
and clone_fn
into one function which saves the size of one pointer but the storage space is larger than a regular pointer in x64, and it can fit a pointer to member function. Yes a pointer to member function takes 16 bytes in x64. As a result the size of a function
is 32 bytes, no matter how it is specialized.