Bit Focus http://iblog.bitfoc.us How std::function is implemented [2/2] http://iblog.bitfoc.us/p/7 http://iblog.bitfoc.us/p/7 May 21 2018 - 06:40:18 +0000

There is a performance problem in the simplified implement of std::function we have talked about at the former post, that is a dynamic allocation is called for no matter what wrapped in the function object, and when the copy-constructor is called, which often happens when the function object is saved into a container, another dynamic allocation is fired again. This probably results in heavy performance problem.

So we are now going to see if there are some approaches to avoid dynamic allocations.

We want to get rid of new but we also need polymorphism, so the solution is to maintenance a set of "virtual function" by ourselves. Well this is where we need function pointers.

Code Snippet 0-0

template <typename Ret, typename Arg0, typename Arg1>
class function<Ret(Arg0, Arg1)> {
    // since we don't know what exact type the wrapped object is
    // store it as void*
    void* callable_ptr;

    // function pointers to replace the virtual functions
    Ret (* call_fn)(function*, Arg0, Arg1);
    void* (* clone_fn)(function const*);
    void (* destruct_fn)(function*);

    // The following static template functions are for the function pointers above.
    // Their generally usage is to cast the 'callable_ptr' into correct type.
    // By specializing these template functions with a certain 'Functor' type
    // and assigning the specialized functions to the pointers above,
    // we can get the stored functor object working properly.
    template <typename Functor>
    static Ret call(function* self, Arg0 arg0, Arg1 arg1)
    {
        return (*static_cast<Functor*>(self->callable_ptr))(arg0, arg1);
    }

    template <typename Functor>
    static void* clone(function const* src)
    {
        new new Functor(*static_cast<Functor const*>(src->callable_ptr));
    }

    template <typename Functor>
    static void destruct(function* self)
    {
        delete static_cast<Functor*>(self->callable_ptr);
    }
public:
    // when this function object is instantiated with a certain object
    // use the type of this object to specialize above template functions
    template <typename F>
    function(F f)
        : call_fn(call<F>)
        , clone_fn(clone<F>)
        , destruct_fn(destruct<F>)
        , callable_ptr(new F(f))
    {}

    function(function const& rhs)
        : call_fn(rhs.call_fn)
        , clone_fn(rhs.clone_fn)
        , destruct_fn(rhs.destruct_fn)
        , callable_ptr(clone_fn(rhs.callable_ptr)) // when copy-constructing, use the 'clone_fn'
    {}

    function& operator=(function const& rhs)
    {
        destruct_fn(callable_ptr);
        call_fn = rhs.call_fn;
        clone_fn = rhs.clone_fn;
        destruct_fn = rhs.destruct_fn;
        callable_ptr = clone_fn(rhs.callable_ptr); // and so is copy-assigning
    }

    // and similar situations we make use of 'destruct_fn' and 'call_fn'
    ~function()
    {
        destruct_fn(callable_ptr);
    }

    Ret operator()(Arg0 arg0, Arg1 arg1)
    {
        return call_fn(callable_ptr, arg0, arg1);
    }
};

By doing all the thing above we eliminated inheritance but still have dynamic allocation new in it. Now we are going to fix it.

We are using a void* to point to the functor object, right? How about fit the functor object into the space of that void*? Yes, you may guess it. The trick to do is placement allocation. For those objects whose sizes are smaller than the size of a pointer, we specialize function templates that performs placement new and delete.

To do this, we also need a template class for tag dispatching, a class that converting compile time boolean to type for overloading. So here is the code

Code Snippet 0-1

// template class for tag dispatching
template <bool placement>
struct Placement {};

template <>
class function<Ret(Arg0, Arg1)> {
    // ...
private:
    // two templates for placement new and dynamic allocation
    template <typename F>
    void init(F f, Placement<false>)
    {
        // if the size of a pointer is not enough for the functor
        // same thing happens
        call_fn = call<F>;
        clone_fn = clone<F>;
        destruct_fn = destruct<F>;
        callable_ptr = new F(f);
    }

    template <typename F>
    void init(F f, Placement<true>)
    {
        // otherwise, use placement function set
        // see the implement of '_placement' below
        call_fn = call_placement<F>;
        clone_fn = clone_placement<F>;
        destruct_fn = destruct_placement<F>;
        new (&this->callable_ptr) F(f);
    }
public:
    template <typename F>
    function(F f)
    {
        // determine which 'init' to use by the size of the functor object
        init(f, Placement<(sizeof(F) <= sizeof(callable_ptr))>());
    }
private:
    // use these 2 functions to obtain the address of 'callable_ptr' of a function object
    static void* addr_of_callable(function* f)
    {
        return &f->callable_ptr;
    }

    static void const* addr_of_callable(function const* f)
    {
        return &f->callable_ptr;
    }

    // implements for '_placement' functions
    template <typename Functor>
    static Ret call_placement(function* self, Arg0 arg0, Arg1 arg1)
    {
        // call the function in a similar way
        return (*static_cast<Functor*>(addr_of_callable(self)))(arg0, arg1);
    }

    template <typename Functor>
    static void destruct_placement(function* self)
    {
        // call the destructor without touching the storage space itself
        static_cast<Functor*>(addr_of_callable(self))->~Functor();
    }

    template <typename Functor>
    static void* clone_placement(function const* src)
    {
        // ??? not figured out yet
    }
};

There is a small problem, that is how the 'clone_placement' is supposed to work. Well, in this situation, we definately cannot placement new an object and returns its address. We need some changes.

Code Snippet 0-2

template <>
class function<Ret(Arg0, Arg1)> {
    // ...

    // clone function does not return anything any more,
    // but to take the address of a destination function object
    void (* clone_fn)(function*, function const*);

    // the dynamic allocation approach is changed like this
    // the 'callable_ptr' of destination is overwritten as the `new`ed object
    template <typename Functor>
    static void clone(function* dst, function const* src)
    {
        dst->callable_ptr = new Functor(*static_cast<Functor const*>(src->callable_ptr));
    }

    // and this is for placement new
    template <typename Functor>
    static void clone_placement(function* dst, function const* src)
    {
        new (addr_of_callable(dst)) Functor(*static_cast<Functor const*>(addr_of_callable(src)));
    }
};

So this is how we solve this problem.

The following code is a complete implement along with working example.

Code Snippet 0-3

#include <iostream>

template <bool placement>
struct Placement {};

template <typename T>
class function;

template <typename Ret, typename Arg0, typename Arg1>
class function<Ret(Arg0, Arg1)> {
    typedef Ret (* CallFn)(function*, Arg0, Arg1);
    typedef void (* CloneFn)(function*, function const*);
    typedef void (* DestructFn)(function*);

    CallFn call_fn;
    CloneFn clone_fn;
    DestructFn destruct_fn;

    void* callable_ptr;

    template <typename Functor>
    static Ret call(function* self, Arg0 arg0, Arg1 arg1)
    {
        return (*static_cast<Functor*>(self->callable_ptr))(arg0, arg1);
    }

    template <typename Functor>
    static void clone(function* dst, function const* src)
    {
        dst->callable_ptr = new Functor(*static_cast<Functor const*>(src->callable_ptr));
    }

    template <typename Functor>
    static void destruct(function* self)
    {
        delete static_cast<Functor*>(self->callable_ptr);
    }

    static void* addr_of_callable(function* f)
    {
        return &f->callable_ptr;
    }

    static void const* addr_of_callable(function const* f)
    {
        return &f->callable_ptr;
    }

    template <typename Functor>
    static Ret call_placement(function* self, Arg0 arg0, Arg1 arg1)
    {
        return (*static_cast<Functor*>(addr_of_callable(self)))(arg0, arg1);
    }

    template <typename Functor>
    static void clone_placement(function* dst, function const* src)
    {
        new (addr_of_callable(dst)) Functor(*static_cast<Functor const*>(addr_of_callable(src)));
    }

    template <typename Functor>
    static void destruct_placement(function* self)
    {
        static_cast<Functor*>(addr_of_callable(self))->~Functor();
    }

    template <typename F>
    void init(F f, Placement<false>)
    {
        call_fn = call<F>;
        clone_fn = clone<F>;
        destruct_fn = destruct<F>;
        callable_ptr = new F(f);
    }

    template <typename F>
    void init(F f, Placement<true>)
    {
        call_fn = call_placement<F>;
        clone_fn = clone_placement<F>;
        destruct_fn = destruct_placement<F>;
        new (&this->callable_ptr) F(f);
    }
public:
    template <typename F>
    function(F f)
    {
        init(f, Placement<(sizeof(F) <= sizeof(callable_ptr))>());
    }

    function(function const& rhs)
        : call_fn(rhs.call_fn)
        , clone_fn(rhs.clone_fn)
        , destruct_fn(rhs.destruct_fn)
    {
        clone_fn(this, &rhs);
    }

    function& operator=(function const& rhs)
    {
        destruct_fn(this);
        call_fn = rhs.call_fn;
        clone_fn = rhs.clone_fn;
        destruct_fn = rhs.destruct_fn;
        clone_fn(this, &rhs);
    }

    ~function()
    {
        destruct_fn(this);
    }

    Ret operator()(Arg0 arg0, Arg1 arg1)
    {
        return call_fn(this, arg0, arg1);
    }
};

int fn_ptr(int x, int y)
{
    return x + y;
}

struct TwoInts {
    TwoInts(int m_, int n_)
        : m(m_)
        , n(n_)
    {}

    int operator()(int x, int y)
    {
        return x + y + m + n;
    }

    int m;
    int n;
};

struct FourInts {
    FourInts(int m_, int n_, int p_, int q_)
        : m(m_)
        , n(n_)
        , p(p_)
        , q(q_)
    {}

    int operator()(int x, int y)
    {
        return x + y + m + n + p + q;
    }

    int m;
    int n;
    int p;
    int q;
};

int main()
{
    // 以函数指针构造
    ::function<int(int, int)> f(fn_ptr);
    std::cout << f(1, 2) << std::endl;

    // 以函数对象构造
    ::function<int(int, int)> g(TwoInts(10, 20));
    std::cout << g(1, 2) << std::endl;

    // 复制构造
    ::function<int(int, int)> h(g);
    std::cout << h(3, 4) << std::endl;

    // 赋值算符
    h = f;
    std::cout << h(3, 4) << std::endl;

    // 以超过指针尺寸的函数对象构造的 function 赋值
    h = ::function<int(int, int)>(FourInts(3, 4, 5, 6));
    std::cout << h(1, 2) << std::endl;

    return 0;
}

For more information:

The implement above is very similar to the implement shipped with GCC in Ubuntu 16.04 LTS, but not exactly the same. The GCC version is more complicated like it combines destruct_fn and clone_fn into one function which saves the size of one pointer but the storage space is larger than a regular pointer in x64, and it can fit a pointer to member function. Yes a pointer to member function takes 16 bytes in x64. As a result the size of a function is 32 bytes, no matter how it is specialized.

]]>
Troubleshooting : Upgrade pip3 in Ubuntu 16.04 http://iblog.bitfoc.us/p/6 http://iblog.bitfoc.us/p/6 May 02 2018 - 00:42:13 +0000

Generally this post is about to setup a Python3 environment on Ubuntu 16.04. However since there seems a problem in pip so let me name it in the title directly.

I started with a fresh new Ubuntu 16.04 droplet at DigitalOcean and have run these commands.

(As non-root with sudo)

Code Snippet 0-0

$ sudo apt-get update
$ sudo apt-get install python3-pip -y
$ sudo pip3 install -U pip

So pip 10 is installed and things seem fine until here. However when I attempt to run pip3, it crashes with an unclear Python stacktrace.

Code Snippet 0-1

$ pip3 --version

Traceback (most recent call last):
  File "/usr/bin/pip3", line 9, in <module>
    from pip import main
ImportError: cannot import name 'main'

It seems that when apt-installed pip upgrades a pypi pip, the pypi pip will be broken and won't work. And the idea is not to use the system-wide pip but to use a venv pip instead.

So the solution is like this.

Code Snippet 0-2

$ sudo apt-get install python3-venv
$ python3 -m venv $HOME/.venv # or any directory you like
$ export PATH=$HOME/.venv/bin:$PATH

Then the pip in the virtualenv will work.

Code Snippet 0-3

$ which python
/home_directory/.venv/bin/python
$ python --version
python 3.5.2
$ which pip
/home_directory/.venv/bin/pip

Now upgrade pip with itself.

Code Snippet 0-4

$ pip install -U pip
...
$ pip --version
pip 10.0.1 from /home_direcotry/.venv/lib/python3.5/site-packages/pip (python 3.5)

Also, you can add export PATH=$HOME/.venv:$PATH to your .bashrc or .zshrc or anything like that.

]]>
Unicode troubleshooting : strftime in Jinja2 http://iblog.bitfoc.us/p/5 http://iblog.bitfoc.us/p/5 Apr 24 2018 - 14:16:28 +0000

Say, in Python2, if we need to format a datetime object with some unicode in the format, what shall we do?

The following code looks perfect

Code Snippet 0-0

# encoding=utf-8

import jinja2
import datetime

now = datetime.datetime.now()

print(jinja2.Template(u'''{{ date.strftime('%Y 年 %m 月') }}''').render(date=now))

Except that it raises a UnicodeEncodeError.

Code Snippet 0-1

Traceback (most recent call last):
  File "test.py", line 8, in <module>
    print(jinja2.Template(u'''{{ date.strftime('%Y 年 %m 月') }}''').render(date=now))
  File "/usr/local/lib/python2.7/dist-packages/jinja2/environment.py", line 1008, in render
    return self.environment.handle_exception(exc_info, True)
  File "/usr/local/lib/python2.7/dist-packages/jinja2/environment.py", line 780, in handle_exception
    reraise(exc_type, exc_value, tb)
  File "<template>", line 1, in top-level template code
UnicodeEncodeError: 'ascii' codec can't encode character u'\u5e74' in position 3: ordinal not in range(128)

So what's wrong with that? The reason is that many standard libraries in Python2 don't have good support to unicode. What a cruel fact.

Well, since we have declared that the file is encoded with UTF-8, how about directly using a str instead of a unicode?

If we use only the standard library it actually works.

Code Snippet 0-2

# encoding=utf-8

import datetime

now = datetime.datetime.now()

print(now.strftime('%Y 年 %m 月'))

This would produce a desired output. So you may think let's remove the prefix u and the template rendering becomes fine, right?

Unfortunately, to do so will get you a UnicodeDecodeError like this

Code Snippet 0-3

Traceback (most recent call last):
  File "test.py", line 8, in <module>
    print(jinja2.Template('''{{ date.strftime('%Y 年 %m 月') }}''').render(date=now))
  File "/usr/local/lib/python2.7/dist-packages/jinja2/environment.py", line 945, in __new__
    return env.from_string(source, template_class=cls)
  File "/usr/local/lib/python2.7/dist-packages/jinja2/environment.py", line 880, in from_string
    return cls.from_code(self, self.compile(source), globals, None)
  File "/usr/local/lib/python2.7/dist-packages/jinja2/environment.py", line 579, in compile
    source = self._parse(source, name, filename)
  File "/usr/local/lib/python2.7/dist-packages/jinja2/environment.py", line 497, in _parse
    return Parser(self, source, name, encode_filename(filename)).parse()
  File "/usr/local/lib/python2.7/dist-packages/jinja2/parser.py", line 40, in __init__
    self.stream = environment._tokenize(source, name, filename, state)
  File "/usr/local/lib/python2.7/dist-packages/jinja2/environment.py", line 528, in _tokenize
    source = self.preprocess(source, name, filename)
  File "/usr/local/lib/python2.7/dist-packages/jinja2/environment.py", line 522, in preprocess
    self.iter_extensions(), text_type(source))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe5 in position 21: ordinal not in range(128)

Well, this is because Jinja2 doesn't take a str with unicode as input. It's a sad paradox that using str will make Jinja2 unhappy while using unicode will make strftime unhappy.

So the only solution is to adapt strftime to take unicode as input. And to do this we need a customized strftime implement, like this

Code Snippet 0-4

# encoding=utf-8

import jinja2
import datetime

def strftime(dt, fmt):
    return dt.strftime(fmt.encode('utf-8')).decode('utf-8')

now = datetime.datetime.now()

print(jinja2.Template(u'''{{ strftime(date, '%Y 年 %m 月') }}''')
                        .render(date=now, strftime=strftime))

Or to be more "jinjaic", declare a filter for it.

Code Snippet 0-5

# encoding=utf-8

import jinja2
import datetime

def strftime(dt, fmt):
    return dt.strftime(fmt.encode('utf-8')).decode('utf-8')

env = jinja2.Environment(loader=jinja2.DictLoader(
            {'test': u'''{{ date|strftime('%Y 年 %m 月') }}'''}))
env.filters['strftime'] = strftime
t = env.get_template('test')
print(t.render(date=datetime.datetime.now()))

NOTE: This will NOT reproduce in Python3 since all the strings are unicodes then.

]]>
How std::function is implemented [1/2] http://iblog.bitfoc.us/p/4 http://iblog.bitfoc.us/p/4 Apr 12 2018 - 07:22:50 +0000

This article is about how std::function is implemented and provide some implements that compiled in pre-C++11.

The std::function in C++11 is very fantastic as in a static compiling language like C++ it provides a set of interfaces to wrap any kind of callable objects. A more fantastic fact is that the only C++11 feature that std::function involves is variadic template.

So if we implements a simplified version of std::function with an arbitrary number of template parameters (say, 3 parameters, 1 for the return type and 2 for parameter types), it could be done in pre-C++11 so we don't have to learn the C++11 features right now.

Let us get down to the implements.

0. One pointer version

The std::function could be implemented as a class with only one pointer as its only data member, and of course several virtual functions.

The key point is to declare a virtual base class which could be used to wrap any kinds of callable object, like this.

Code Snippet 0-0

template <typename Ret, typename Arg0, typename Arg1>
class function<Ret(Arg0, Arg1)> {
    // the virtual base class for callables
    struct callable_base {
        virtual Ret operator()(Arg0 arg0, Arg1 arg1) = 0;
        virtual ~callable_base() {}
    };

    callable_base* callable_ptr;
public:
    Ret operator()(Arg0 arg0, Arg1 arg1)
    {
        // a call to the function is routed to the member pointer.
        return (*callable_ptr)(arg0, arg1);
    }
private:
    // here is the sub-template-class that inherits callable_base
    template <typename F>
    struct callable
        : callable_base
    {
        // it can store and make use of any callable object since it is a template class
        F functor;

        callable(F functor)
            : functor(functor)
        {}

        virtual Ret operator()(Arg0 arg0, Arg1 arg1)
        {
            return functor(arg0, arg1);
        }
    };
public:
    // so that create a 'function' instance is to specialize the 'callable' template with the type of callable object
    template <typename F>
    function(F f)
        : callable_ptr(new callable<F>(f))
    {}
};

The implements above is fine to be instanciated and be called, but apparently there are resource leak problems since there is new in the constructor but no delete in the destructor.

Before we implements the destructor we must be aware of that copy-constructor and copy-assign operator overload shall be implements along with the destructor.

So let's consider about them.

The basic idea to create copy-constructor is to copy each members recursively. However the problem we have here is we don't know how to copy an abstract base pointer. To solve this, we can add a "copy-constructor" as a virtual function and implement it in the subclass.

Code Snippet 0-1

template <typename Ret, typename Arg0, typename Arg1>
class function<Ret(Arg0, Arg1)> {

    // ...
    struct callable_base {
        virtual Ret operator()(Arg0 arg0, Arg1 arg1) = 0;
        // add a clone virtual function
        virtual callable_base* clone() const = 0;
        virtual ~callable_base() {}
    };


    template <typename F>
    struct callable
        : callable_base
    {
        F functor;

        // ...

        // subclass implements it to make a copy of itself
        virtual callable_base* clone() const
        {
            return new callable<F>(functor);
        }
    };

    callable_base* callable_ptr;
public:
    // use clone to copy a function object
    function(function const& rhs)
        : callable_ptr(rhs.callable_ptr->clone())
    {}

    function& operator=(function const& rhs)
    {
        delete callable_ptr;
        callable_ptr = rhs.callable_ptr->clone();
    }

    // and delete the pointer when destructed
    ~function()
    {
        delete callable_ptr;
    }

This is a oversimplification though it could run. The real world implements (like the implement shipped with gcc) uses more tricks to avoid heap allocation in some situation. Let me explain it in the next post.

]]>
Configurations Trouble Shooting for Postgres 9 on CentOS 7 http://iblog.bitfoc.us/p/3 http://iblog.bitfoc.us/p/3 Apr 01 2018 - 07:12:26 +0000 0. Change the Data Directory

For some reason the -D or --pgdata arguments passed to initdb command doesn't work. The solution is modifying /etc/init.d/postgres-X.Y, changing this line

PGDATA=path

And run initdb again (of course -D or --pgdata is not necessary any more).

1. Access Control

The network access is set as ident after installation. If you want to use username/password to login the database, which is much easier (but may have some security problems as you need to keep the password carefully), you need to change ... well the file name is not explicitly documented. You are supposed to execute the following command to find it out.

show hba_file

Postgres will give you the path where the configure file locates (usually a pg_hba.conf file). Open that file, modifying this line

# host  all    all     127.0.0.1/32   ident
# change it to
host    all    all     127.0.0.1/32   password

Clarification: you must use the postgres user (the UNIX user) to login your localhost Postgres server to execute show hba_file, as all other users are required to provide the ident in this case.

You might have noticed that the IP address is 127.0.0.1 in that file. This address means all connections from 127.0.0.1 (the localhost) are required to provide the username/password. If you have several hosts in your internal network, you may need change that address into this (assume your internal network has addresses in the pattern of 192.168.*.*)

host    all    all     192.168.0.0/16   password

However by default the Postgres binds only the localhost so clients from other hosts are unable to connect in the first place. In short, there is another file postgresql.conf, which is usually in the same directory of the hba_file, should be taken care of. Change this line in postgresql.conf

listen_addresses = 'localhost,192.168.x.x'

Use comma to separate adapter addresses, and replace 192.168.x.x to the address of the Postgres server.

]]>
Traits in Generic Programming http://iblog.bitfoc.us/p/2 http://iblog.bitfoc.us/p/2 Jan 10 2012 - 12:32:17 +0000 bools
template <bool AllowDuplicate, bool SortElements, bool CheckOutOfRange>
struct just_another_container;
To use that code will become some sort of nightmare, since every parameter is a bool, if you make a mistake on the order of arguments, you could hardly discover it before the program goes mad.
Another approach is, to merge all bools to a single unsigned, like
template <unsigned Policy>
struct just_another_container;
But that is not good too, since in the first place we have to define some flags
enum {
    ALLOW_DUP_MASK = 1,
    SORT_ELE_MASK = 2,
    CHECK_OUT_OF_RANGE_MASK = 4,
};
and then to use those flags, say, consider we add an insert interface to the container, which is concerned about whether or not allows duplicated elements in the container, the code may look like
void insert(element_type e)
{
    _insert<Policy & ALLOW_DUP_MASK>(e);
}

template <>
void _insert<0>(element_type e);

template <>
void _insert<ALLOW_DUP_MASK>(element_type e);
However unfortunately that won't compile, because C++ forbid specialize template functions (whether partial or not). So, we have to put that _insert into a template struct, like
template <unsigned AllowDuplicate>
struct insert_s
{
    static void insert(just_a_container& container, element_type& e);
};

template <>
struct insert_s<ALLOW_DUP_MASK>
{
    static void insert(just_a_container& container, element_type& e);
};
That looks really weird, and for struct insert_s, It should be granted public access to just_a_container.
Besides, in the code there would be full of bitwise-and here and there like
void another_member_function()
{
    element_type ele;
    /* wanna call insert function of policy = Policy */
    insert_s<Policy & ALLOW_DUP_MASK>::insert(*this, ele);
    /* ... */
    find_s<Policy & SORT_ELE_MASK>::insert(*this, ele);
    /* ... */
}
It requires great carefulness to use such code or the program will become out of control if any & is omitted or wrong flag is used.

But luckily, generice programming gets its own way --- multiple inheritance.
If you hates Object-Oriented programming, don't feel depressive please. This time it's not about polymorphism. It's all traits.

Ok, let's get down to the code. Change just_another_container into
template <typename Policy>
struct just_another_container;
Then define this kinds of empty structures
struct policy_base {};
struct allow_dup : public policy_base {};
struct sort_ele : public policy_base {};
struct check_out_of_range : public policy_base {};
Now we are going to combine all kinds of desired traits by multiple inheritance, like
struct my_policy : public allow_dup, public sort_ele {};
just_another_container<my_policy> my_container;
Then, change insert function signature. This time, function overloading will be our friend.
void insert(element_type e, policy_base);
void insert(element_type e, allow_dup);
According to C++ function overloading rules, a sub-type will be casted to a base type that most close to the parameter type. So if we call insert by passing an instance of my_policy, the overload void insert(element_type e, allow_dup) will be called. For example
void another_member_function()
{
    element_type ele;
    /* wanna call insert function of policy = my_policy */
    insert(ele, my_policy());
}
You don't have to worry about the efficiency though there is one more runtime argument, as the compiler will help optimize it.

In STL, each iterator (among std::vector::iterat, std::list::iterator, etc) will have an iterator_category defined, which would be one of the following
InputIterator
OutputIterator
ForwardIterator
BiirectionalIterator
RandomAccessIterator
InputIterator and OutputIterator are the 2 base types, ForwardIterator inherits both of them, then follows BidirectionalIterator, which inherits ForwardIterator, and the RandomAccessIterator, a sub-class of ForwardIterator.
Some STL algorithm will be concerned about iterator_category, and use it to match the most efficient overload. For example, std::distance will
  • simply use minus, if category is RandomAccessIterator;
  • count element one by one, if category is exactly ForwardIterator;
  • still count element one by one, if BidirectionalIterator, the sub-type of ForwardIterator is passed.
]]>
Leap Year Determination http://iblog.bitfoc.us/p/1 http://iblog.bitfoc.us/p/1 Jan 10 2012 - 12:31:39 +0000 a straightforward way to tell whether a year is leap or not. Well here is a more "compact" way (pseudocode)
func is_leap_year(year)
    return (year MOD 4) XOR (year MOD 100) XOR (year MOD 400)
where MOD is modulo, and XOR is exclusive or.
]]>
Circles Intersection http://iblog.bitfoc.us/p/0 http://iblog.bitfoc.us/p/0 Jan 10 2012 - 12:30:40 +0000 int intersect(struct circle_t const circles[], struct point_t intersections[]);
where point_t and circle_t are
struct point_t {
    double x;
    double y;
};

struct circle_t {
    point_t center;
    double r;
};
    The function intersect takes 2 circles as parameter, returns the number of points of intersection, and the details of the points will be put in parameter intersection. Since 2 circles may have up to 2 points of intersection, we may call that function in following way:
#include <stdio.h>

int main(void)
{
    struct circle_t circles[2]; // 2 circles
    struct point_t points[2]; // and up to 2 points

    /* here we read the information via stdin
     * in this format
     *   (x0, y0, r0)
     *   (x1, y1, r1)
     */
    scanf("%lf%lf%lf%lf%lf%lf",
          &circles[0].center.x, &circles[0].center.y, &circles[0].r,
          &circles[1].center.x, &circles[1].center.y, &circles[1].r);

    /* 2 circles are the same */
    /* NOTE: since x, y and r are all of type double,
     *       use == to compare them may cause problem.
     */
    if (circles[0].center.x == circles[1].center.x
     && circles[0].center.y == circles[1].center.y
     && circles[0].r == circles[1].r)
    {
       puts("The circles are the same.");
       return 0;
    }

    /* we call //intersect// here.
     * both //circles// and //points// are an array of 2.
     * if only one point of intersection, it will be stored in //points[0]//.
     */
    switch (intersect(circles, points)) {
        case 0:
            puts("No intersection.");
            break;
        case 1:
            printf("(%.3lf %.3lf)\n", points[0].x, points[0].y);
            break;
        case 2:
            printf("(%.3lf %.3lf) (%.3lf %.3lf)\n",
                   points[0].x, points[0].y,
                   points[1].x, points[1].y);
    }
    return 0;
}
First, let's consider a simple situation: there is no intersection
#include <math.h> // we need //sqrt// and //fabs//

double distance_sqr(struct point_t const* a, struct point_t const* b)
{
    return (a->x - b->x) * (a->x - b->x) + (a->y - b->y) * (a->y - b->y);
}

double distance(struct point_t const* a, struct point_t const* b)
{
    return sqrt(distance_sqr(a, b));
}

int intersect(struct circle_t const circles[], struct point_t intersections[])
{
    double d = distance(&circles[0].center, &circles[1].center);
    if (d > circles[0].r + circles[1].r
     || d < fabs(circles[0].r - circles[1].r))
    {
        return 0;
    }
    /* more codes here */
}
To solute the rest of this problem, besides the usual way of representing circles like
  • (x - x0)2 + (y - y0)2 = r2, where (x0, y0) is the center and r is the radius
we will also use parametric equation for that
  • x = r * cosθ + x0
  • y = r * sinθ + y0

Now, let (xa, ya) be one point of intersections, then for circle0 (that is (x0, y0) and r0), there is some θa that meets
  • xa = r0 * cosθa + x0
  • ya = r0 * sinθa + y0
then substitute (xa, ya) in the equation of circle1
(r0 * cosθa + x0 - x1)2 + (r0 * sinθa + y0 - y1)2 = r12

Don't panic though the equation is full of horrible calls to trigonometrical function. We do some tricks on the left and will get
  //LEFT// = ((r0 * cosθa) + (x0 - x1))2 + ((r0 * sinθa) + (y0 - y1))2
      = (r0 * cosθa)2 + (x0 - x1)2 + 2 * (r0 * cosθa) * (x0 - x1) +
        (r0 * sinθa)2 + (y0 - y1)2 + 2 * (r0 * sinθa) * (y0 - y1)
      = (r0 * cosθa)2 + (r0 * sinθa)2 + (x0 - x1)2 + (y0 - y1)2 +
        2 * r0 * (x0 - x1) * cosθa + 2 * r0 * (y0 - y1) * sinθa
      = r02 + (x0 - x1)2 + (y0 - y1)2 +
        2 * r0 * (x0 - x1) * cosθa + 2 * r0 * (y0 - y1) * sinθa
      = r12 = //RIGHT//

Then let
  • a = 2 * r0 * (x0 - x1)
  • b = 2 * r0 * (y0 - x1)
  • c = r12 - (r02 + (x0 - x1)2 + (y0 - y1)2)
the equation will be transformed to
    a * cosθa + b * sinθa = c
and then represent sinθa as ±(1 - (cosθa)2)1/2
    b * (±(1 - (cosθa)2))1/2 = c - a * cosθa
square both sides (then the annoying ± is gone)
    b2 * (1 - (cosθa)2) = c2 + a2 * (cosθa)2 - 2 * c * a * cosθa
 => b2 - b2 * (cosθa)2 = c2 + a2 * (cosθa)2 - 2 * c * a * cosθa
 => (a2 + b2) * (cosθa)2 - (2 * a * c) * cosθa + (c2 - b2) = 0

See, we are about to solute a quadratic equation of cosθa, which won't be very hard
    cosθ = (±(q2 - 4 * p * s)1/2 - q) / (2 * p)
where
  • p = a2 + b2
  • q = -2 * a * c
  • s = c2 - b2

You may notice the sign ±, so how may we determine which it should really be? We have 2 choices here, one is to calculate for more details, until we find whether it should be positive or negative, the other is, we just //verify// the solution, by using the magical computing power of the computer. In the latter way, what we will do is substitute the following 4 solutions
  • (((q2 - 4 * p * s)1/2 - q) / (2 * p), (1 - (cosθa)2)1/2)
  • ((-(q2 - 4 * p * s)1/2 - q) / (2 * p), (1 - (cosθa)2)1/2)
  • (((q2 - 4 * p * s)1/2 - q) / (2 * p), -(1 - (cosθa)2)1/2)
  • ((-(q2 - 4 * p * s)1/2 - q) / (2 * p), -(1 - (cosθa)2)1/2)
in the origin equation, to see which will meet.
By the way, if there is only one point of intersection, (q2 - 4 * p * s)1/2 will be 0 and the sign is not important.

Here is the complete code
#include <stdio.h>
#include <math.h>

struct point_t {
    double x;
    double y;
};

struct circle_t {
    struct point_t center;
    double r;
};

double distance_sqr(struct point_t const* a, struct point_t const* b)
{
    return (a->x - b->x) * (a->x - b->x) + (a->y - b->y) * (a->y - b->y);
}

double distance(struct point_t const* a, struct point_t const* b)
{
    return sqrt(distance_sqr(a, b));
}

int intersect(struct circle_t const circles[], struct point_t intersections[])
{
    double a, b, c, p, q, s; // we have talked about those a, b, c, p, q, s
    double cos_value[2], sin_value[2]; // values for cosθa sinθa
    double d // distance between centers of the two circles
             = distance(&circles[0].center, &circles[1].center);

    if (d > circles[0].r + circles[1].r
        || d < fabs(circles[0].r - circles[1].r))
    {
        return 0;
    }

    a = 2.0 * circles[0].r * (circles[0].center.x - circles[1].center.x);
    b = 2.0 * circles[0].r * (circles[0].center.y - circles[1].center.y);
    c = circles[1].r * circles[1].r - circles[0].r * circles[0].r
        - distance_sqr(&circles[0].center, &circles[1].center);
    p = a * a + b * b;
    q = -2.0 * a * c;

    // there is only one point of intersection
    if (d == circles[0].r + circles[1].r
     || d == fabs(circles[0].r - circles[1].r))
    {
        cos_value[0] = -q / p / 2.0;
        sin_value[0] = sqrt(1 - cos_value[0] * cos_value[0]);

        intersections[0].x = circles[0].r * cos_value[0] + circles[0].center.x;
        intersections[0].y = circles[0].r * sin_value[0] + circles[0].center.y;

        // verify the solution here, if it is not, negate sinθa
        if (distance_sqr(&intersections[0], &circles[1].center)
            != circles[1].r * circles[1].r)
        {
            intersections[0].y = circles[0].center.y
                                 - circles[0].r * sin_value[0];
        }
        return 1;
    }

    s = c * c - b * b;
    cos_value[0] = (sqrt(q * q - 4.0 * p * s) - q) / p / 2.0;
    cos_value[1] = (-sqrt(q * q - 4.0 * p * s) - q) / p / 2.0;
    sin_value[0] = sqrt(1 - cos_value[0] * cos_value[0]);
    sin_value[1] = sqrt(1 - cos_value[1] * cos_value[1]);

    intersections[0].x = circles[0].r * cos_value[0] + circles[0].center.x;
    intersections[1].x = circles[0].r * cos_value[1] + circles[0].center.x;
    intersections[0].y = circles[0].r * sin_value[0] + circles[0].center.y;
    intersections[1].y = circles[0].r * sin_value[1] + circles[0].center.y;

    // verify and correct both solutions, still by negating sinθa
    if (distance_sqr(&intersections[0], &circles[1].center)
        != circles[1].r * circles[1].r)
    {
        intersections[0].y = circles[0].center.y - circles[0].r * sin_value[0];
    }
    if (distance_sqr(&intersections[1], &circles[1].center)
        != circles[1].r * circles[1].r)
    {
        intersections[1].y = circles[0].center.y - circles[0].r * sin_value[1];
    }

    /* if we got 2 same solutions, pick one and negate its Y coord
     * it is because cosθa is used to represent X coord of the solutions,
     * so when we got same solutions the problem must be on the Y coord.
     */
    if (intersections[0].y == intersections[1].y
     && intersections[0].x == intersections[1].x)
    {
        intersections[1].y = -intersections[1].y;
    }
    return 2;
}

int main(void)
{
    struct circle_t circles[2];
    struct point_t points[2];

    scanf("%lf%lf%lf%lf%lf%lf",
          &circles[0].center.x, &circles[0].center.y, &circles[0].r,
          &circles[1].center.x, &circles[1].center.y, &circles[1].r);

    if (circles[0].center.x == circles[1].center.x
     && circles[0].center.y == circles[1].center.y
     && circles[0].r == circles[1].r)
    {
       puts("The circles are the same.");
       return 0;
    }

    switch (intersect(circles, points)) {
        case 0:
            puts("No intersection.");
            break;
        case 1:
            printf("(%.3lf %.3lf)\n", points[0].x, points[0].y);
            break;
        case 2:
            printf("(%.3lf %.3lf) (%.3lf %.3lf)\n",
                   points[0].x, points[0].y,
                   points[1].x, points[1].y);
    }
    return 0;
}
]]>