@STL: std:function still have overhead which makes it less usable for doing small things. Like using in a scope guard class or special callbacks, etc..

Take this code and compile it in release mode / win32 and look at it in debugging.

#include <iostream>
#include <functional>

using namespace std;

inline void func() {
    cout << "func" << endl;
}

struct s_func {
    inline void operator()() {
        cout << "s_func" << endl;
    }
};

template <typename Functor>
void test_template(Functor f) {
    f();
}

void test_function(function<void()> f) {
    f();
}

int main()
{
    test_template(func);
    test_function(func);

    test_template(s_func());
    test_function(s_func());
}

test_template(func):

    test_template(func);
00301095  mov         eax,dword ptr [__imp_std::endl (302044h)]  
0030109A  mov         ecx,dword ptr [__imp_std::cout (302068h)]  
003010A0  push        eax  
003010A1  push        offset string "func" (30214Ch)  
003010A6  push        ecx  
003010A7  call        std::operator<<<std::char_traits<char> > (301240h)  
003010AC  add         esp,8  
003010AF  mov         ecx,eax  
003010B1  call        dword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (30204Ch)]  

test_function(func); : (called functions not included)

    test_function(func);
003010B7  mov         edx,offset func (301040h)  
003010BC  test        edx,edx  
003010BE  jne         main+54h (3010C4h)  
003010C0  xor         ecx,ecx  
003010C2  jmp         main+65h (3010D5h)  
003010C4  mov         dword ptr [ebp-24h],offset std::tr1::_Impl_no_alloc0<std::tr1::_Callable_fun<void (__cdecl*const)(void),0>,void>::`vftable' (302188h)  
003010CB  mov         dword ptr [ebp-20h],offset func (301040h)  
003010D2  lea         ecx,[ebp-24h]  
003010D5  mov         dword ptr [ebp-14h],ecx  
003010D8  mov         dword ptr [ebp-4],0  
003010DF  test        ecx,ecx  
003010E1  jne         $LN163 (3010E9h)  
003010E3  call        dword ptr [__imp_std::tr1::_Xfunc (302064h)]  
$LN163:
003010E9  mov         eax,dword ptr [ecx]  
003010EB  mov         edx,dword ptr [eax+4]  
003010EE  call        edx  
003010F0  mov         dword ptr [ebp-4],0FFFFFFFFh  
003010F7  mov         ecx,dword ptr [ebp-14h]  
003010FA  test        ecx,ecx  
003010FC  je          $LN163+28h (301111h)  
003010FE  mov         eax,dword ptr [ecx]  
00301100  mov         eax,dword ptr [eax+0Ch]  
00301103  lea         edx,[ebp-24h]  
00301106  cmp         ecx,edx  
00301108  setne       dl  
0030110B  movzx       edx,dl  
0030110E  push        edx  
0030110F  call        eax  

function 'test_template' is a clear winner. Now the sad part is that you can't do this in a class without overhead because you have to store the function object which isn't really supported so you have to resort to tricks which have overhead.

// class version equivalent to test_template
// This should have been supported but nope
class scope_guard {
    auto f;
public:
    template <typename Functor>
    scope_guard(Functor f_obj) : f(f_obj) { // <- ???
    }
    ~scope_guard() {
        f();
    }

function "test_template" works because you don't have to store the function object