Joshua Burkholder

Back to Profile: Burkholder

Comments

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    To make this potential VC++ 2010 bug (explicit template argument type disagreement) a little more obvious, here is some much shorter code that pertains just to the bug:

    #include <iostream>
    #include <type_traits>
    
    using namespace std;
    
    template < typename F >
    bool is_F_same_as_decltype_ff ( F ff ) {
        // to suppress unreferenced formal parameter warnings:
        static_cast< void >( ff );
        // check if F is the same as the type of ff
        return is_same< F, decltype( ff ) >::value;
    }
    
    template < typename F >
    bool is_F_same_as_void_void_func_type ( F ) {
        return is_same< F, void () >::value;
    }
    
    template < typename F >
    bool is_F_same_as_ptr_to_void_void_func_type ( F ) {
        return is_same< F, void (*) () >::value;
    }
    
    void f () {
        //
    }
    
    int main () {
        cout << "=================================================" << endl;
        cout << endl;
        // implicit template arguments:
        cout << "is_F_same_as_decltype_ff( f ): ";//vc++: 1, g++: 1
        cout << is_F_same_as_decltype_ff( f ) << endl;
        cout << endl;
        cout << "Is F void ()?     ";//vc++: 0, g++: 0
        cout << is_F_same_as_void_void_func_type( f ) << endl;
        cout << "Is F void (*) ()? ";//vc++: 1, g++: 1
        cout << is_F_same_as_ptr_to_void_void_func_type( f ) << endl;
        cout << endl;
        cout << "=================================================" << endl;
        cout << endl;
        // explicit template arguments:
        cout << "is_F_same_as_decltype_ff< decltype( f ) >( f ): ";//vc++: 0, g++: 1
        cout << is_F_same_as_decltype_ff< decltype( f ) >( f ) << endl;
        cout << endl;
        cout << "Is F void ()?     ";//vc++: 1, g++: 1
        cout << is_F_same_as_void_void_func_type< decltype( f ) >( f ) << endl;
        cout << "Is F void (*) ()? ";//vc++: 0, g++: 0
        cout << is_F_same_as_ptr_to_void_void_func_type< decltype( f ) >( f ) << endl;
        cout << endl;
        cout << "=================================================" << endl;
        cout << endl;
        // ... and just to be extra explicit
        // explicit template arguments:
        cout << "is_F_same_as_decltype_ff< void () >( f ): ";//vc++: 0, g++: 1
        cout << is_F_same_as_decltype_ff< void () >( f ) << endl;
        cout << endl;
        cout << "Is F void ()?     ";//vc++: 1, g++: 1
        cout << is_F_same_as_void_void_func_type< void () >( f ) << endl;
        cout << "Is F void (*) ()? ";//vc++: 0, g++: 0
        cout << is_F_same_as_ptr_to_void_void_func_type< void () >( f ) << endl;
        cout << endl;
        cout << "=================================================" << endl;
    
        return 0;
    }
    On VC++ 2010, this produces the following output:
    =================================================
    
    is_F_same_as_decltype_ff( f ): 1
    
    Is F void ()?     0
    Is F void (*) ()? 1
    
    =================================================
    
    is_F_same_as_decltype_ff< decltype( f ) >( f ): 0
    
    Is F void ()?     1
    Is F void (*) ()? 0
    
    =================================================
    
    is_F_same_as_decltype_ff< void () >( f ): 0
    
    Is F void ()?     1
    Is F void (*) ()? 0
    
    =================================================
    While on g++ 4.5.2, this produces the following output:
    =================================================
    
    is_F_same_as_decltype_ff( f ): 1
    
    Is F void ()?     0
    Is F void (*) ()? 1
    
    =================================================
    
    is_F_same_as_decltype_ff< decltype( f ) >( f ): 1
    
    Is F void ()?     1
    Is F void (*) ()? 0
    
    =================================================
    
    is_F_same_as_decltype_ff< void () >( f ): 1
    
    Is F void ()?     1
    Is F void (*) ()? 0
    
    =================================================
    Since we have F ff as the parameter of the is_F_same_as_decltype_ff() function, it seems like F should __always__ agree with decltype( ff ).  In other words, the bug is that there is type disagreement when explicit template arguments are used (esp. when there is no type disagreement when implicit template arguments are deduced).

    Hope This Clarifies,
    Joshua Burkholder

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    @STL:I am a bonehead! ... I finally understand your post ... with a little help from page 168 of "C++ Templates: The Complete Guide".

    Note To Self:  Implicitly deduced template arguments can decay.  Explicit template arguments cannot.

    In my search for clarity, I may have found a **** potential bug **** in VC++ 2010.  Here's the code that helped me out ... I'll explain the potential bug at the end:

    #include <iostream>
    #include <type_traits>
    
    using namespace std;
    
    // decltype( f ): void ()
    void f () {
        //
    }
    
    template< typename F >
    void check_similarity_1 ( F f_ ) {
        static_cast< void >( f_ ); // to eliminate warnings
        // if __implicit__ template argument deduction, then:
        // decltype( f ) decays from void () to void (*) ();
        // therefore, decltype( f_ ) is void (*) () and
        // F is void (*) ()
        cout << "is_same< decltype( f_ ), void () >::value:     "
             << is_same< decltype( f_ ), void () >::value << endl;
        cout << "is_same< decltype( f_ ), void (*) () >::value: "
             << is_same< decltype( f_ ), void (*) () >::value << endl;
        cout << "is_same< decltype( f_ ), void (&) () >::value: "
             << is_same< decltype( f_ ), void (&) () >::value << endl;
        cout << "is_same< F, void () >::value:     "
             << is_same< F, void () >::value << endl;
        cout << "is_same< F, void (*) () >::value: "
             << is_same< F, void (*) () >::value << endl;
        cout << "is_same< F, void (&) () >::value: "
             << is_same< F, void (&) () >::value << endl;
    }
    
    template < typename F >
    void check_similarity_2 ( F * f_ptr ) {
        static_cast< void >( f_ptr ); // to eliminate warnings
        // if __implicit__ template argument deduction, then:
        // decltype( f ) decays from void () to void (*) ();
        // therefore, decltype( f_ptr ) is void (*) () and 
        // F is void ()
        cout << "is_same< decltype( f_ptr ), void () >::value:     "
             << is_same< decltype( f_ptr ), void () >::value << endl;
        cout << "is_same< decltype( f_ptr ), void (*) () >::value: "
             << is_same< decltype( f_ptr ), void (*) () >::value << endl;
        cout << "is_same< decltype( f_ptr ), void (&) () >::value: "
             << is_same< decltype( f_ptr ), void (&) () >::value << endl;
        cout << "is_same< F, void () >::value:     "
             << is_same< F, void () >::value << endl;
        cout << "is_same< F, void (*) () >::value: "
             << is_same< F, void (*) () >::value << endl;
        cout << "is_same< F, void (&) () >::value: "
             << is_same< F, void (&) () >::value << endl;
    }
    
    template < typename F >
    void check_similarity_3 ( F & f_ref ) {
        static_cast< void >( f_ref ); // to eliminate warnings
        // if __implicit__ template argument deduction, then:
        // decltype( f ) goes from void() to void (&) ();
        // therefore, decltype( f_ref ) is void (&) () and
        // F is void ()
        cout << "is_same< decltype( f_ref ), void () >::value:     "
             << is_same< decltype( f_ref ), void () >::value << endl;
        cout << "is_same< decltype( f_ref ), void (*) () >::value: "
             << is_same< decltype( f_ref ), void (*) () >::value << endl;
        cout << "is_same< decltype( f_ref ), void (&) () >::value: "
             << is_same< decltype( f_ref ), void (&) () >::value << endl;
        cout << "is_same< F, void () >::value:     "
             << is_same< F, void () >::value << endl;
        cout << "is_same< F, void (*) () >::value: "
             << is_same< F, void (*) () >::value << endl;
        cout << "is_same< F, void (&) () >::value: "
             << is_same< F, void (&) () >::value << endl;
    }
    
    int main () {
        cout << "main():" << endl;
        cout << "-------" << endl;
        // no __implicit__ template argument deduction ( std::is_same<...> 
        // uses directly deducible, __explicit__ template arguments ); hence, 
        // function types do not decay to pointer types:
        // decltype( f ) is still void () inside of std::is_same<...>
        cout << "is_same< decltype( f ), void () >::value:     "
             << is_same< decltype( f ), void () >::value << endl;
        cout << "is_same< decltype( f ), void (*) () >::value: "
             << is_same< decltype( f ), void (*) () >::value << endl;
        cout << "is_same< decltype( f ), void (&) () >::value: "
             << is_same< decltype( f ), void (&) () >::value << endl;
        cout << endl;
    
        cout << "check_similarity_1( F f_ ):" << endl;
        cout << "---------------------------" << endl;
        check_similarity_1( f );
        cout << endl;
    
        cout << "check_similarity_2( F * f_ptr ):" << endl;
        cout << "--------------------------------" << endl;
        check_similarity_2( f );
        cout << endl;
    
        cout << "check_similarity_3( F & f_ref ):" << endl;
        cout << "--------------------------------" << endl;
        check_similarity_3( f );
        cout << endl;
    
        // Does the following produce a bug in VC++ 2010?
        cout << "check_similarity_1< decltype( f ) >( F f_ ):" << endl;
        cout << "--------------------------------------------" << endl;
        check_similarity_1< decltype( f ) >( f );
        cout << endl;
    
        return 0;
    }
    
    In VC++ 2010, this produces the following output:
    main():
    -------
    is_same< decltype( f ), void () >::value:     1
    is_same< decltype( f ), void (*) () >::value: 0
    is_same< decltype( f ), void (&) () >::value: 0
    
    check_similarity_1( F f_ ):
    ---------------------------
    is_same< decltype( f_ ), void () >::value:     0
    is_same< decltype( f_ ), void (*) () >::value: 1
    is_same< decltype( f_ ), void (&) () >::value: 0
    is_same< F, void () >::value:     0
    is_same< F, void (*) () >::value: 1
    is_same< F, void (&) () >::value: 0
    
    check_similarity_2( F * f_ptr ):
    --------------------------------
    is_same< decltype( f_ptr ), void () >::value:     0
    is_same< decltype( f_ptr ), void (*) () >::value: 1
    is_same< decltype( f_ptr ), void (&) () >::value: 0
    is_same< F, void () >::value:     1
    is_same< F, void (*) () >::value: 0
    is_same< F, void (&) () >::value: 0
    
    check_similarity_3( F & f_ref ):
    --------------------------------
    is_same< decltype( f_ref ), void () >::value:     0
    is_same< decltype( f_ref ), void (*) () >::value: 0
    is_same< decltype( f_ref ), void (&) () >::value: 1
    is_same< F, void () >::value:     1
    is_same< F, void (*) () >::value: 0
    is_same< F, void (&) () >::value: 0
    
    check_similarity_1< decltype( f ) >( F f_ ):
    --------------------------------------------
    is_same< decltype( f_ ), void () >::value:     0
    is_same< decltype( f_ ), void (*) () >::value: 1
    is_same< decltype( f_ ), void (&) () >::value: 0
    is_same< F, void () >::value:     1
    is_same< F, void (*) () >::value: 0
    is_same< F, void (&) () >::value: 0
    In g++ 4.5.2, this code produces the following output:
    main():
    -------
    is_same< decltype( f ), void () >::value:     1
    is_same< decltype( f ), void (*) () >::value: 0
    is_same< decltype( f ), void (&) () >::value: 0
    
    check_similarity_1( F f_ ):
    ---------------------------
    is_same< decltype( f_ ), void () >::value:     0
    is_same< decltype( f_ ), void (*) () >::value: 1
    is_same< decltype( f_ ), void (&) () >::value: 0
    is_same< F, void () >::value:     0
    is_same< F, void (*) () >::value: 1
    is_same< F, void (&) () >::value: 0
    
    check_similarity_2( F * f_ptr ):
    --------------------------------
    is_same< decltype( f_ptr ), void () >::value:     0
    is_same< decltype( f_ptr ), void (*) () >::value: 1
    is_same< decltype( f_ptr ), void (&) () >::value: 0
    is_same< F, void () >::value:     1
    is_same< F, void (*) () >::value: 0
    is_same< F, void (&) () >::value: 0
    
    check_similarity_3( F & f_ref ):
    --------------------------------
    is_same< decltype( f_ref ), void () >::value:     0
    is_same< decltype( f_ref ), void (*) () >::value: 0
    is_same< decltype( f_ref ), void (&) () >::value: 1
    is_same< F, void () >::value:     1
    is_same< F, void (*) () >::value: 0
    is_same< F, void (&) () >::value: 0
    
    check_similarity_1< decltype( f ) >( F f_ ):
    --------------------------------------------
    is_same< decltype( f_ ), void () >::value:     1
    is_same< decltype( f_ ), void (*) () >::value: 0
    is_same< decltype( f_ ), void (&) () >::value: 0
    is_same< F, void () >::value:     1
    is_same< F, void (*) () >::value: 0
    is_same< F, void (&) () >::value: 0
    Everything agrees between the output from VC++ 2010 and g++ 4.5.2, except the "check_similarity_1< decltype( f ) >( F f_ )" section ... and there is the potential bug.

    **** potential bug ****:  In VC++ 2010 when I make the call to check_similarity_1< decltype( f ) >( f ) inside of main(), is_same< decltype( f_ ), void (*) () >::value is 1 within that check_similarity_1 function ... even though I explicitly declared the template parameter F to be "void ()" (i.e. decltype( f ) ).  Shouldn't is_same< decltype( f_ ), void () >::value be 1 instead (esp. since is_same< F, void () >::value is 1 within that same function)?  In g++ 4.5.2, is_same< decltype( f_ ), void () >::value is 1.  It seems like VC++ 2010 is assuming that a function argument will always become a pointer ( i.e. "void ()" f will always go to "void (*) ()" f_ ) whether or not it agrees with its actual type ( F: void () ).

    Hope This Helps,
    Joshua Burkholder

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    @STL:Looks like we were typing at the same time again!  Wink

    I'm still not sure why the rewriting in the std::is_same<>'s in main isn't taking the function type down to a pointer to function type, but I figured out how to go from "void(*)()" back to "void()" ... which was so simple I'm a little embarrased I asked the question ... just a simple typename std::remove_pointer<...>::type.  Here's the code:

    #include <iostream>
    #include <type_traits>
    
    using namespace std;
    
    void f () {
        //
    }
    
    template < typename F >
    void check_similarity_1 ( F ) {
        cout << "is_same< F, void () >::value:   "
             << is_same< F, void () >::value << endl; // 0
        cout << "is_same< F, void(*)() >::value: "
             << is_same< F, void(*)() >::value << endl; // 1
    }
    
    template < typename F >
    void check_similarity_2 ( F f_ ) {
        static_cast< void >( f_ ); // to eliminate warnings
        cout << "is_same< decltype( f_ ), void () >::value:   "
             << is_same< decltype( f_ ), void () >::value << endl; // 0
        cout << "is_same< decltype( f_ ), void(*)() >::value: "
             << is_same< decltype( f_ ), void(*)() >::value << endl; // 1
    }
    template < typename F >
    void check_similarity_3 ( F ) {
        cout << "is_same< typename remove_pointer< F >::type, void () >::value:   "
             << is_same< typename remove_pointer< F >::type, void () >::value << endl; // 1
        cout << "is_same< typename remove_pointer< F >::type, void(*)() >::value: "
             << is_same< typename remove_pointer< F >::type, void(*)() >::value << endl; // 0
    }
    
    template < typename F >
    void check_similarity_4 ( F f_ ) {
        static_cast< void >( f_ ); // to eliminate warnings
        cout << "is_same< typename remove_pointer< decltype( f_ ) >::type, void () >::value:   "
             << is_same< typename remove_pointer< decltype( f_ ) >::type, void () >::value << endl; // 1
        cout << "is_same< typename remove_pointer< decltype( f_ ) >::type, void(*)() >::value: "
             << is_same< typename remove_pointer< decltype( f_ ) >::type, void(*)() >::value << endl; // 0
    }
    
    int main () {
        cout << "is_same< decltype( f ), void () >::value:   "
             << is_same< decltype( f ), void () >::value << endl; // 1
        cout << "is_same< decltype( f ), void(*)() >::value: "
             << is_same< decltype( f ), void(*)() >::value << endl; // 0
        cout << "is_same< decltype( &f ), void () >::value:   "
             << is_same< decltype( &f ), void () >::value << endl; // 0
        cout << "is_same< decltype( &f ), void(*)() >::value: "
             << is_same< decltype( &f ), void(*)() >::value << endl; // 1
        cout << "is_same< typename remove_pointer< decltype( f ) >::type, void () >::value:   "
             << is_same< typename remove_pointer< decltype( f ) >::type, void () >::value << endl; // 1
        cout << "is_same< typename remove_pointer< decltype( f ) >::type, void(*)() >::value: "
             << is_same< typename remove_pointer< decltype( f ) >::type, void(*)() >::value << endl; // 0
        cout << "is_same< typename remove_pointer< decltype( &f ) >::type, void () >::value:   "
            << is_same< typename remove_pointer< decltype( &f ) >::type, void () >::value << endl; // 1
        cout << "is_same< typename remove_pointer< decltype( &f ) >::type, void(*)() >::value: "
             << is_same< typename remove_pointer< decltype( &f ) >::type, void(*)() >::value << endl; // 0
        check_similarity_1( f );
        check_similarity_2( f );
        check_similarity_3( f );
        check_similarity_4( f );
        return 0;
    }
    
    Here's the output:
    is_same< decltype( f ), void () >::value:   1
    is_same< decltype( f ), void(*)() >::value: 0
    is_same< decltype( &f ), void () >::value:   0
    is_same< decltype( &f ), void(*)() >::value: 1
    is_same< typename remove_pointer< decltype( f ) >::type, void () >::value:   1
    is_same< typename remove_pointer< decltype( f ) >::type, void(*)() >::value: 0
    is_same< typename remove_pointer< decltype( &f ) >::type, void () >::value:   1
    is_same< typename remove_pointer< decltype( &f ) >::type, void(*)() >::value: 0
    is_same< F, void () >::value:   0
    is_same< F, void(*)() >::value: 1
    is_same< decltype( f_ ), void () >::value:   0
    is_same< decltype( f_ ), void(*)() >::value: 1
    is_same< typename remove_pointer< F >::type, void () >::value:   1
    is_same< typename remove_pointer< F >::type, void(*)() >::value: 0
    is_same< typename remove_pointer< decltype( f_ ) >::type, void () >::value:   1
    is_same< typename remove_pointer< decltype( f_ ) >::type, void(*)() >::value: 0

    Thanks For Any Clarification ... And Your Continued Patience Wink,
    Joshua Burkholder

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    @STL:Looks like we were both typing at the same time.  Wink  Thank you very much for the explanation!  I'm getting closer to understanding.

    Is there anyway to go from a "void(*)()" back to "void()"?  Or avoid this rewriting?

    Lastly, why is the decltype( f ) in my first couple of std::is_same<...> lines in main() __not__ being rewritten to a pointer to function type once it is inside std::is_same<...>?  In other words, why is the following output being produced from main()?

    is_same< decltype( f ), void () >::value:   1
    is_same< decltype( f ), void(*)() >::value: 0
    is_same< decltype( &f ), void () >::value:   0
    is_same< decltype( &f ), void(*)() >::value: 1 

    Thanks In Advance For Clarification ... And Your Patience With My Dense-ness,
    Joshua Burkholder

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    @new2STL:Since void f() isn't always void(*)(), let me clarify.  Why is the first std::is_same<...> in main() returing 1?

    is_same< decltype( f ), void () >::value:   1 
    Similarly, why is the second std::is_same<...> in main() returning 0?
    is_same< decltype( f ), void(*)() >::value: 0
    It seems like this should be 0 then 1 ... like the check_similarity_x() functions, vice 1 then 0.  In order to get a 0 then 1 in main(), I have to do the following:
     #include <iostream>
    #include <type_traits>
    
    using namespace std;
    
    void f () {
        //
    }
    
    template < typename F >
    void check_similarity_1 ( F ) {
        cout << "is_same< F, void () >::value:   "
             << is_same< F, void () >::value << endl; // 0
        cout << "is_same< F, void(*)() >::value: "
             << is_same< F, void(*)() >::value << endl; // 1
    }
    
    template < typename F >
    void check_similarity_2 ( F f_ ) {
        static_cast< void >( f_ ); // to eliminate warnings
        cout << "is_same< decltype( f_ ), void () >::value:   "
             << is_same< decltype( f_ ), void () >::value << endl; // 0
        cout << "is_same< decltype( f_ ), void(*)() >::value: "
             << is_same< decltype( f_ ), void(*)() >::value << endl; // 1
    }
    
    int main () {
        cout << "is_same< decltype( f ), void () >::value:   "
             << is_same< decltype( f ), void () >::value << endl; // 1
        cout << "is_same< decltype( f ), void(*)() >::value: "
             << is_same< decltype( f ), void(*)() >::value << endl; // 0
        cout << "is_same< decltype( &f ), void () >::value:   "
             << is_same< decltype( &f ), void () >::value << endl; // 0
        cout << "is_same< decltype( &f ), void(*)() >::value: "
             << is_same< decltype( &f ), void(*)() >::value << endl; // 1
        check_similarity_1( f );
        check_similarity_2( f );
        return 0;
    }
    
    Which produces the following output:
    is_same< decltype( f ), void () >::value:   1
    is_same< decltype( f ), void(*)() >::value: 0
    is_same< decltype( &f ), void () >::value:   0
    is_same< decltype( &f ), void(*)() >::value: 1
    is_same< F, void () >::value:   0
    is_same< F, void(*)() >::value: 1
    is_same< decltype( f_ ), void () >::value:   0
    is_same< decltype( f_ ), void(*)() >::value: 1
    Obviously, this is some rule that I didn't pay enough attention to when I was learning C++ ... or just didn't learn the right way Wink ... I'm just trying to figure out which rule this is.

    Joshua Burkholder

     

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    Why does the decltype of a function change from "void()" to "void(*)()" in the following code?

    Code:

    #include <iostream>
    #include <type_traits>
    
    using namespace std;
    
    void f () {
        //
    }
    
    template < typename F >
    void check_similarity_1 ( F ) {
        cout << "is_same< F, void () >::value:   "
             << is_same< F, void () >::value << endl;// 0
        cout << "is_same< F, void(*)() >::value: "
             << is_same< F, void(*)() >::value << endl;// 1
    }
    
    template < typename F >
    void check_similarity_2 ( F f_ ) {
        static_cast< void >( f_ ); // to eliminate warnings
        cout << "is_same< decltype( f_ ), void () >::value:   "
             << is_same< decltype( f_ ), void () >::value << endl;// 0
        cout << "is_same< decltype( f_ ), void(*)() >::value: "
             << is_same< decltype( f_ ), void(*)() >::value << endl;// 1
    }
    
    int main () {
        cout << "is_same< decltype( f ), void () >::value:   "
             << is_same< decltype( f ), void () >::value << endl;// 1
        cout << "is_same< decltype( f ), void(*)() >::value: "
             << is_same< decltype( f ), void(*)() >::value << endl;// 0
        check_similarity_1( f );
        check_similarity_2( f );
        return 0;
    }
    
    The output of this code on both VC++2010 and g++ 4.5.2 is the following:
    is_same< decltype( f ), void () >::value:   1
    is_same< decltype( f ), void(*)() >::value: 0
    is_same< F, void () >::value:   0
    is_same< F, void(*)() >::value: 1
    is_same< decltype( f_ ), void () >::value:   0
    is_same< decltype( f_ ), void(*)() >::value: 1

    Thanks In Advance For Any Clarification,
    Joshua Burkholder

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    @Matt_PD:Thank you so much ... the optimizing_cpp.pdf is OUTSTANDING!

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    @new2stl:Thanks for the information.  I will use the intrin.h header.

    Unfortunately, intrin.h does not seem to exist for GCC's g++ and MinGW's port of g++; however, emmintrin.h exists for both GCC's and MinGW's g++ ... and for Visual C++ 2010.  Here's the simple test that I ran:

    #include <iostream>
    #include <intrin.h>
    //#include <emmintrin.h>
    
    using namespace std;
    
    int main () {
        cout << sizeof( __m128d ) << endl;
        return 0;
    }
    
    ... and here's the command line:
    g++ -o main.exe main.cpp -std=c++0x -march=native -O3 -Wall -Wextra -Werror

    Joshua Burkholder

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    @new2stl:Thanks for the information.  I will use the intrin.h header.

    Unfortunately, the machines that I'm writing for don't seem to have the AVX instruction set and its 256 bit registers.  Our machines are about three years old ... and it seems that AVX is relatively new.

    I am new to SSE.  The only info that I have read is the couple of MSDN Help webpages about MMX/SSE intrinsics and the one GCC webpage that I could find.  Where can I learn about how to program for the SSE instruction sets?  What are the good book titles?  What are the good websites/tutorials?

    FYI:  The reason that I am making copies of images is that I am at the start of a research project where I will get video streams from a couple of cameras.  I need to send each video stream to a couple of real-time algorithms that will execute concurrently in separate threads.  Since I don't know how destructive each algorithm will be to the image buffer, I just have another thread capture the images (i.e. fill the image buffer), make copies, and then let those algorithm threads loose on the copies ... where they can destructively edit those copies in complete isolation.  The real-time algorithms have changed a few times, so this way I have a system that works without the chance of one one thread stomping on another.  I'm sure that I'll revisit this copying-images code at the end of the project ... but by then, all the other algorithms will have been decided upon (and hopefully, set in stone).

     

    Thanks Again,
    Joshua Burkholder

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    @STL:


    Burkholder> 5) What improvements can be made to my loop unrolling code at the end of this post?

    Consider using SSE, etc. Video processing is a perfect scenario for vectorization.

    (Of course, for simple copying, just use memcpy()/memmove(). In fact, our implementation of std::copy() calls memmove() when it can get away with it - something I'm very likely to cover in future parts.)

    Thanks for the AWESOME suggestions!!!  SSE, memcpy(), and memmove() are amazing!

    I'm a complete newbie to SSE ... but WOW ... it seems like using one instruction to load multiple floating point values into a 128-bit register and then using another instruction to store those values is a quicker way to go.  I have a few questions on SSE:

    1) Since I'm a newbie to SSE, I used the following type of code to copy memory from one place to another:

    #include <emmintrin.h>
    ...
    size_t const image_size = 640 * 480 * 3;
    double * image_buffer = new double[ image_size ];
    double * image_1 = new double[ image_size ];
    double * image_2 = new double[ image_size ];
    ...
    capture_image( image_buffer, image_size );
    ...
    __m128d sse_register;
    
    // Since two 64-bit doubles fit into one 128-bit sse register,
    // then our delta is 2.
    size_t const delta = 2;
    for ( size_t i = 0; i < image_size; i += delta ) {
        sse_register = _mm_load_pd( &image_buffer[ i ] );
        _mm_store_pd( &image_1[ i ], sse_register );
        _mm_store_pd( &image_2[ i ], sse_register );
    }
    In this case, is this the correct way to use SSE?  Or is there a better way?

    2) The following "normal" type of code (i.e. no _mm_xxxx_pd() stuff) also compiled and ran:

    #include <emmintrin.h>
    ...
    size_t const image_size = 640 * 480 * 3;
    double * image_buffer = new double[ image_size ];
    double * image_1 = new double[ image_size ];
    double * image_2 = new double[ image_size ];
    ...
    capture_image( image_buffer, image_size );
    ...
    // Since 128-bits is 2 * 64-bits, then ...
    size_t const img_size = image_size / 2;
    __m128 const * img_buffer = reinterpret_cast< __m128 const * >( &image_buffer[ 0 ] );
    __m128 * img_1 = reinterpret_cast< __m128 * >( &image_1[ 0 ] );
    __m128 * img_2 = reinterpret_cast< __m128 * >( &image_2[ 0 ] );
    for ( size_t i = 0; i < img_size; ++i ) {
        img_1[ i ] = img_buffer[ i ];
        img_2[ i ] = img_buffer[ i ];
    }
    
    Will this type of code (i.e. using __m128d * the same way I would double * or any other pointer) be valid in the future?  Or is this something that works in VS2010, but might not work in future versions?  ... If this will work in future versions, then what's up with all that _mm_xxxx_pd() stuff?

    3) I have no idea if I'm writing good or bad SSE code.  What are the suggested tutorials?  Are there any good books?

    Lastly, memcpy() and memmove() were faster at copying a single image than anything that I could write ... even with SSE and loop unrolling.  I could only beat memcpy() and memmove() when I took into account my specific situation ... copying a single image buffer into two images ... where I could use a single for-loop for both images (as above), vice a separate loop for each image.  So my question is:

    4) What is the "secret sauce" in memcpy() and memmove() that makes them so much faster?  Is the implementation of VS2010's memcpy() or memmove() available?  If so, where can I find that code?

    Definitely cover memmove() when you cover std::copy().

     

    Thanks In Advance,
    Joshua Burkholder

     

  • C9 Lectures: Stephan T Lavavej - Advanced STL, 1 of n

    Based on the previous STL videos and my interest in using templates to unroll loops, I have the following questions:

    1) What is the equivalent VS2010 C++ flag that corresponds to g++'s -ftemplate-depth=n ( http://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-ftemplate_002ddepth-156 )?  Can this flag be set in the Visual Studio's Project/Properties/Configuration Properties dialog box (or some other dialog box) or just at the command line?

    2) Since C++0x's maximum template instantiation depth seems to be implementation dependent ( http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2010/n3225.pdf - Section 14.7.1 - Point 14 - Page 372), what is the maximum template instantiation depth of VS2010's templates?  Or is it template parameter dependent (i.e. compile-time stack dependent)?

    3) At compile-time, is there any way (even some weird compiler-dependent macro) to determine how deep into template instantiations we are without keeping track of that depth ourselves?

    4) for-loops seem to unroll themsleves if the conditional expression can be determined at compile-time and if the number of iterations are small enough.  What is the maximum number of iterations that still allow for-loops to unroll?  In others words, when does for-loop unrolling end and assembly jumps begin?

    5) What improvements can be made to my loop unrolling code at the end of this post?

    Background:
    Previously, I asked if we could cover loop unrolling (esp. for assignment statements).  Normally, if I needed to repeatedly perform a few hundred thousand assignment statements (i.e. copying the buffer of images or video frames for processing) and I wanted to minimize the impact of the "i < size" and "++i" in that for-loop, then I would just write out a for-loop with a bunch of assignment statements in the for-loop body using a script and then copy & paste that code into the relevent cpp file by hand.  Of course, this manual for-loop unrolling assumed that the size of the loops (i.e. the size of the images or video frames) weren't going to change from one compilation to another.  A few weeks back, I had to come up with something a little easier to work with since I was going to be dealing with a number of different buffer sizes (all still known at compile time ... no run-time querying).  With the help of pages 314-318 of C++ Templates: The Complete Guide, I ended up writing something like the following:

    #include <cstddef>
    #include <iostream>
    
    using namespace std;
    
    //=========================================================
    
    #ifdef _MSC_VER
    #define INLINE __forceinline
    #else
    #define INLINE inline
    #endif
    
    //primary template:
    template < typename ACTION_TYPE, typename TYPE, size_t n >
    struct unroll_loop_t {
        INLINE static void call ( TYPE * destination_ptr, TYPE const * source_ptr ) {
            ACTION_TYPE::call( destination_ptr, source_ptr );
            unroll_loop_t< ACTION_TYPE, TYPE, n - 1 >::call( destination_ptr + 1, source_ptr + 1 );
        }
    };
    
    //partial specialization template:
    template < typename ACTION_TYPE, typename TYPE >
    struct unroll_loop_t< ACTION_TYPE, TYPE, 1 > {
        INLINE static void call ( TYPE * destination_ptr, TYPE const * source_ptr ) {
            ACTION_TYPE::call( destination_ptr, source_ptr );
        }
    };
    
    //partial specialization template:
    template < typename ACTION_TYPE, typename TYPE >
    struct unroll_loop_t< ACTION_TYPE, TYPE, 0 > {
        INLINE static void call ( TYPE *, TYPE const * ) {
            // nothing
        }
    };
    
    //primary template:
    template < typename ACTION_TYPE, typename TYPE, size_t n >
    struct loop_t {
        INLINE static void call ( TYPE * destination, TYPE const * source ) {
            size_t const block_size = 512; // max number of iterations unrolled
            size_t const number_of_blocks = ( n / block_size ); // integer division
            size_t const partial_block_size = ( n % block_size );
            for ( size_t block = 0; block < number_of_blocks; ++block )
                unroll_loop_t< ACTION_TYPE, TYPE, block_size >::call(
                    &destination[ block * block_size ],
                    &source[ block * block_size ]
                );
            unroll_loop_t< ACTION_TYPE, TYPE, partial_block_size >::call(
                &destination[ number_of_blocks * block_size ],
                &source[ number_of_blocks * block_size ]
            );
        }
    };
    
    template < typename TYPE >
    struct assignment_t {
        INLINE static void call ( TYPE * destination_ptr, TYPE const * source_ptr ) {
            *destination_ptr = *source_ptr;
        }
    };
    
    //convenience function template:
    template < size_t n, typename TYPE >
    INLINE void assign( TYPE * destination, TYPE const * source ) {
        loop_t< assignment_t< TYPE >, TYPE, n >::call( destination, source );
    }
    
    //=========================================================
    
    void zeroize ( double *, size_t const );
    void capture ( double *, size_t const );
    void print ( char const *, double const *, size_t const );
    void print_is_equal ( char const *, double const *, double const *, size_t const );
    void process ( double *, size_t const );
    
    int main () {
        size_t const width = 640; // assume this comes in a .h file
        size_t const height = 480; // assume this comes in a .h file
        size_t const depth = 3; // assume this comes in a .h file
        size_t const size = width * height * depth; // assume this comes in a .h file
    
        double * image_buffer = new double[ size ];
        double * image_1 = new double[ size ];
        double * image_2 = new double[ size ];
    
        capture( image_buffer, size );
    
        print( "image_buffer", image_buffer, size );
        print( "image_1", image_1, size );
        print( "image_2", image_2, size );
    
        cout << endl;
        cout << "assign< size >( image_1, image_buffer );" << endl;
        cout << "assign< size >( image_2, image_buffer );" << endl;
        cout << endl;
    
        assign< size >( image_1, image_buffer );
        assign< size >( image_2, image_buffer );
        
        print( "image_buffer", image_buffer, size );
        print( "image_1", image_1, size );
        print( "image_2", image_2, size );
    
        cout << endl;
    
        print_is_equal( "image_1 == image_buffer", image_1, image_buffer, size );
        print_is_equal( "image_2 == image_buffer", image_2, image_buffer, size );
        print_is_equal( "image_1 == image_2", image_1, image_2, size );
    
        process( image_1, size );
        process( image_2, size );
    
        delete[] image_2;
        delete[] image_1;
        delete[] image_buffer;
        
        return 0;
    }
    
    void zeroize ( double * img, size_t const n ) {
        for ( size_t i = 0; i < n; ++i )
            img[ i ] = 0;
    }
    
    void capture ( double * img, size_t const n ) {
        // simulate capturing an image
        for ( size_t i = 0; i < n; ++i )
            img[ i ] = i + 1;
    }
    
    void print ( char const * name, double const * img, size_t const n ) {
        cout << name << ": [ ";
        size_t const max_length = 2;
        size_t const length = ( n <= max_length ? n : max_length );
        for ( size_t i = 0; i < length; ++i )
            cout << ( i == 0 ? "" : ", " ) << img[ i ];
        if ( length < n )
            cout << ", " << ( length + 1 == n ? "" : "..., " ) << img[ n - 1 ];
        cout << " ]" << endl;
    }
    
    void print_is_equal ( char const * str, double const * img_1, double const * img_2, size_t const n ) {
        bool is_equal = true;
        for ( size_t i = 0; i < n; ++i ) {
            if ( img_1[ i ] != img_2[ i ] ) {
                is_equal = false;
                break;
            }
        }
        cout << str << ": " << is_equal << endl;
    }
    
    void process ( double *, size_t const ) {
        // apply filters
    }
    
    This code can be compiled in g++ 4.5.2 using the following command line (assuming the code is in a file named main.cpp):
    g++ -o main.exe main.cpp -std=c++0x -O3 -Wall -Wextra -Werror
    or compiled in VS2010 using Warning Level 4.  For VS2010, it takes about two minutes to compile in Release mode if you are also producing the Assembly with Source Code ( /FAs ... or Properties / Configuration Properties / C/C++ / Output Files / Assembler Output ) as well.  This code produces the following output:
    image_buffer: [ 1, 2, ..., 921600 ]
    image_1: [ 0, 0, ..., 0 ]
    image_2: [ 0, 0, ..., 0 ]
    
    assign< size >( image_1, image_buffer );
    assign< size >( image_2, image_buffer );
    
    image_buffer: [ 1, 2, ..., 921600 ]
    image_1: [ 1, 2, ..., 921600 ]
    image_2: [ 1, 2, ..., 921600 ]
    
    image_1 == image_buffer: 1
    image_2 == image_buffer: 1
    image_1 == image_2: 1

    Thanks In Advance,
    Joshua Burkholder

  • C9 Lectures: Stephan T. Lavavej - Standard Template Library (STL), 10 of 10

    @devcodex:That's a whole lot of specific code for a very simple and general idea.  I'm not really sure that a destructive endian swap is the way to go; however, try this code out:

    typedef unsigned int SIZE;
    typedef unsigned char BYTE;
    
    template < typename T >
    inline T & swap_endian ( T & t ) {
        static_assert( std::is_integral< T >::value, "swap_endian<T> requires T to be an integral type." );
        SIZE const number_of_bytes = sizeof( t );
        SIZE const middle = number_of_bytes / 2; // integer division
        BYTE * bytes = reinterpret_cast< BYTE * >( &t );
        for ( SIZE i = 0, j = number_of_bytes - 1; i < middle; ++i, --j ) {
            bytes[ i ] ^= bytes[ j ];
            bytes[ j ] ^= bytes[ i ];
            bytes[ i ] ^= bytes[ j ];
        }
        return t;
    }
    Two of these swap_endian's in a row gets you back where you started from ... without having to use temporaries ... ; however, this code can easily be modified to be non-destructive and use the more traditional buffer.  Does this code meet your needs?

    The code above can be used in the following manner:

    #include <iostream>
    #include <iomanip>
    #include <type_traits>
    
    typedef unsigned int SIZE;
    typedef unsigned char BYTE;
    
    template < typename T >
    void print_byte_info ( T const & t ) {
        std::ostream & os = std::cout;
        SIZE const number_of_bytes = sizeof( t );
        SIZE const number_of_bits = 8;
        SIZE const width = 4;
        BYTE const * bytes = reinterpret_cast< BYTE const * >( &t );
        for ( SIZE i = 0, j; i < number_of_bytes; ++i ) {
            os << "byte ( " << std::setw( width ) << ( i + 1 ) << " ): ( address: "
                << std::setw( width ) << static_cast< void const * >( &bytes[ i ] )
                << ", value: " << std::setw( width ) << 
                static_cast< unsigned int >( bytes[ i ] ) << ", binary value: ";
            j = number_of_bits;
            do {
                --j;
                os << ( bytes[ i ] & ( 1 << j ) ? '1' : '0' );
            } while ( j > 0 );
            os << " )\n";
        }
        os << std::endl;
    }
    
    template < typename T >
    inline T & swap_endian ( T & t ) {
        static_assert( std::is_integral< T >::value, "swap_endian<T> requires T to be an integral type." );
        SIZE const number_of_bytes = sizeof( t );
        SIZE const middle = number_of_bytes / 2; // integer division
        BYTE * bytes = reinterpret_cast< BYTE * >( &t );
        for ( SIZE i = 0, j = number_of_bytes - 1; i < middle; ++i, --j ) {
            bytes[ i ] ^= bytes[ j ];
            bytes[ j ] ^= bytes[ i ];
            bytes[ i ] ^= bytes[ j ];
        }
        return t;
    }
    
    int main () {
        unsigned long long x = 0x0102030405060708ULL;
        std::cout << "x ( unsigned long long ): " << x << std::endl;
        std::cout << "x ( unsigned long long | hex ): " << std::hex << x << std::endl;
        std::cout << std::dec;
        print_byte_info( x );
        swap_endian( x );
        std::cout << "swapped x ( unsigned long long ): " << x << std::endl;
        std::cout << "swapped x ( unsigned long long | hex ): " << std::hex << x << std::endl;
        std::cout << std::dec;
        print_byte_info( x );
    
        signed long long y = 0xF7F8F9FAFBFCFDFELL;
        std::cout << "y ( signed long long ): " << y << std::endl;
        std::cout << "y ( signed long long | hex ): " << std::hex << y << std::endl;
        std::cout << std::dec;
        print_byte_info( y );
        swap_endian( y );
        std::cout << "swapped y ( signed long long ): " << y << std::endl;
        std::cout << "swapped y ( signed long long | hex ): " << std::hex << y << std::endl;
        std::cout << std::dec;
        print_byte_info( y );
        
        return 0;
    }
    
    This code will compile using Warning Level 4 in VS2010 or in g++ 4.5.2 using the following command ( assuming the code is in main.cpp ):
    g++ -o main.exe main.cpp -std=c++0x -O3 -Wall -Wextra -Werror
    Here's an example of the output from this code:
    x ( unsigned long long ): 72623859790382856
    x ( unsigned long long | hex ): 102030405060708
    byte (    1 ): ( address: 0044F89C, value:    8, binary value: 00001000 )
    byte (    2 ): ( address: 0044F89D, value:    7, binary value: 00000111 )
    byte (    3 ): ( address: 0044F89E, value:    6, binary value: 00000110 )
    byte (    4 ): ( address: 0044F89F, value:    5, binary value: 00000101 )
    byte (    5 ): ( address: 0044F8A0, value:    4, binary value: 00000100 )
    byte (    6 ): ( address: 0044F8A1, value:    3, binary value: 00000011 )
    byte (    7 ): ( address: 0044F8A2, value:    2, binary value: 00000010 )
    byte (    8 ): ( address: 0044F8A3, value:    1, binary value: 00000001 )
    
    swapped x ( unsigned long long ): 578437695752307201
    swapped x ( unsigned long long | hex ): 807060504030201
    byte (    1 ): ( address: 0044F89C, value:    1, binary value: 00000001 )
    byte (    2 ): ( address: 0044F89D, value:    2, binary value: 00000010 )
    byte (    3 ): ( address: 0044F89E, value:    3, binary value: 00000011 )
    byte (    4 ): ( address: 0044F89F, value:    4, binary value: 00000100 )
    byte (    5 ): ( address: 0044F8A0, value:    5, binary value: 00000101 )
    byte (    6 ): ( address: 0044F8A1, value:    6, binary value: 00000110 )
    byte (    7 ): ( address: 0044F8A2, value:    7, binary value: 00000111 )
    byte (    8 ): ( address: 0044F8A3, value:    8, binary value: 00001000 )
    
    y ( signed long long ): -578437695752307202
    y ( signed long long | hex ): f7f8f9fafbfcfdfe
    byte (    1 ): ( address: 0044F88C, value:  254, binary value: 11111110 )
    byte (    2 ): ( address: 0044F88D, value:  253, binary value: 11111101 )
    byte (    3 ): ( address: 0044F88E, value:  252, binary value: 11111100 )
    byte (    4 ): ( address: 0044F88F, value:  251, binary value: 11111011 )
    byte (    5 ): ( address: 0044F890, value:  250, binary value: 11111010 )
    byte (    6 ): ( address: 0044F891, value:  249, binary value: 11111001 )
    byte (    7 ): ( address: 0044F892, value:  248, binary value: 11111000 )
    byte (    8 ): ( address: 0044F893, value:  247, binary value: 11110111 )
    
    swapped y ( signed long long ): -72623859790382857
    swapped y ( signed long long | hex ): fefdfcfbfaf9f8f7
    byte (    1 ): ( address: 0044F88C, value:  247, binary value: 11110111 )
    byte (    2 ): ( address: 0044F88D, value:  248, binary value: 11111000 )
    byte (    3 ): ( address: 0044F88E, value:  249, binary value: 11111001 )
    byte (    4 ): ( address: 0044F88F, value:  250, binary value: 11111010 )
    byte (    5 ): ( address: 0044F890, value:  251, binary value: 11111011 )
    byte (    6 ): ( address: 0044F891, value:  252, binary value: 11111100 )
    byte (    7 ): ( address: 0044F892, value:  253, binary value: 11111101 )
    byte (    8 ): ( address: 0044F893, value:  254, binary value: 11111110 )

    Hope This Helps,
    Joshua Burkholder