Find the answer to your Linux question:
Page 2 of 3 FirstFirst 1 2 3 LastLast
Results 11 to 20 of 21
there was some the first time i ran it, but i didnt run it with 1 and then 2, so the second time i got some errors: /usr/bin/opreport error: Already ...
Enjoy an ad free experience by logging in. Not a member yet? Register.
  1. #11
    Just Joined!
    Join Date
    Dec 2006
    Posts
    85

    there was some the first time i ran it, but i didnt run it with 1 and then 2, so the second time i got some errors:
    /usr/bin/opreport error: Already displaying results for parameter tgid with values:
    tgid:18118,tgid:18538,tgid:18879,tgid:19284,tgid:1 9624, and 8 more,
    which conflicts with parameter tid.
    Suggestion: specify tgid: or --merge tgid
    opannotate error: Already displaying results for parameter tgid with values:
    tgid:18118,tgid:18538,tgid:18879,tgid:19284,tgid:1 9624, and 8 more,
    which conflicts with parameter tid.
    Suggestion: specify tgid: or --merge tgid

    --edit--
    fixed:
    1:
    Code:
    9.10user 0.00system 0:09.35elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+0outputs (0major+254minor)pagefaults 0swaps
    CPU: P4 / Xeon, speed 3000 MHz (estimated)
    Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
    Processes with a thread ID of 21771
    Processes with a thread ID of 21772
            tid:21771|        tid:21772|
      samples|      %|  samples|      %|
    ------------------------------------
            2 100.000     35877 100.000 /tmp/forum
    	        tid:21771|        tid:21772|
    	  samples|      %|  samples|      %|
    	------------------------------------
    	        2 100.000         0       0 /lib/ld-2.7.so
    	        0       0     35877 100.000 /tmp/forum
    /* 
     * Command line: opannotate image:/tmp/forum --source 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: P4 / Xeon, speed 3000 MHz (estimated)
     * Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
     * Processes with a thread ID of 21771
     * Processes with a thread ID of 21772
     */
    /* 
     * Total samples for file : "/tmp/forum.c"
     * 
     *      0       0 35877 100.000
     */
    
    
                                   :#include <stdio.h>
                                   :#include <math.h>
                                   :#include <pthread.h>
                                   :using namespace std;
                                   :
                                   :
                                   :struct hello_world
                                   :{
                                   :double first;
                                   :double second;
                                   :double result;
                                   :};
                                   :
                                   :void *myfunc(void *value) /* myfunc(void*) total:      0       0 35877 100.000 */
                                   :{
                                   :hello_world *myhello = (hello_world*)value;
         0       0  5828 16.2444   :for (int counter = myhello->first; counter <= myhello->second; counter++)
                                   :{
         0       0 30049 83.7556   :myhello->result += sqrt((double)counter);
                                   :}
                                   :}
                                   :
                                   :int main()
                                   :{
                                   :pthread_attr_t attr;
                                   :pthread_attr_init(&attr);
                                   :pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
                                   :pthread_t thread1, thread2;
                                   :hello_world *myhello;
                                   :hello_world *nexthello;
                                   :myhello = new hello_world;
                                   :nexthello = new hello_world;
                                   :myhello->first = 1.0;
                                   :myhello->second = 500000000.0;
                                   :nexthello->first = 500000001.0;
                                   :nexthello->second = 1000000000.0;
                                   :pthread_create( &thread1, &attr, myfunc, (void*)myhello);
                                   :pthread_join(thread1,NULL);
                                   :}
    
    Linux  debian  2.6.26-2-686  #1 SMP Thu Mar 26 01:08:11 UTC 2009  i686  05/28/2009
    
    07:12:23  cpu %usr %nice   %sys %irq %softirq    %wait %idle             _cpu_
    07:12:23  all   58     0      4    0        0        0    38
                0   19     0      2    0        0        0    79
                1   95     0      5    0        0        0     0
    07:12:24  all   57     0      1    0        0        2    40
                0   15     0      1    0        0        3    81
                1   99     0      1    0        0        0     0
    07:12:25  all   52     0      0    0        0        0    47
                0    5     0      1    0        0        0    94
                1  100     0      1    0        0        0     0
    07:12:26  all   52     0      1    0        0        0    48
                0    5     0      0    0        0        0    95
                1   99     0      1    0        0        0     0
    07:12:27  all   57     0      2    0        0        0    41
                0   15     0      3    0        0        0    82
                1   99     0      1    0        0        0     0
    07:12:28  all   52     0      0    0        0        0    48
                0    6     0      0    0        0        0    94
                1   99     0      0    0        0        0     1
    07:12:29  all   52     0      0    0        0        0    48
                0    4     0      0    0        0        0    96
                1  100     0      0    0        0        0     0
    07:12:30  all   55     0      2    0        0        0    42
                0   13     0      2    0        0        0    85
                1   98     0      3    0        0        0     0
    07:12:31  all   60     0      0    0        0        0    40
                0   19     0      1    0        0        0    80
                1   99     0      1    0        0        0     0
    07:12:32  all   58     0      3    0        0        0    38
                0   23     0      2    0        0        0    75
                1   94     0      3    0        0        0     3
    2:
    Code:
    58.64user 0.14system 0:32.37elapsed 181%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+0outputs (0major+256minor)pagefaults 0swaps
    CPU: P4 / Xeon, speed 3000 MHz (estimated)
    Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
    Processes with a thread ID of 22086
    Processes with a thread ID of 22087
    Processes with a thread ID of 22088
            tid:22086|        tid:22087|        tid:22088|
      samples|      %|  samples|      %|  samples|      %|
    ------------------------------------------------------
            4 100.000    114380 100.000    114508 100.000 /tmp/forum
    	        tid:22086|        tid:22087|        tid:22088|
    	  samples|      %|  samples|      %|  samples|      %|
    	------------------------------------------------------
    	        4 100.000         0       0         0       0 /lib/ld-2.7.so
    	        0       0    114380 100.000    114508 100.000 /tmp/forum
    /* 
     * Command line: opannotate image:/tmp/forum --source 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: P4 / Xeon, speed 3000 MHz (estimated)
     * Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
     * Processes with a thread ID of 22086
     * Processes with a thread ID of 22087
     * Processes with a thread ID of 22088
     */
    /* 
     * Total samples for file : "/tmp/forum.c"
     * 
     *      0       0114380 100.000114508 100.000
     */
    
    
                                                   :#include <stdio.h>
                                                   :#include <math.h>
                                                   :#include <pthread.h>
                                                   :using namespace std;
                                                   :
                                                   :
                                                   :struct hello_world
                                                   :{
                                                   :double first;
                                                   :double second;
                                                   :double result;
                                                   :};
                                                   :
                                                   :void *myfunc(void *value) /* myfunc(void*) total:      0       0114380 100.000114508 100.000 */
                                                   :{
                                                   :hello_world *myhello = (hello_world*)value;
         0       0 31552 27.5852 32481 28.3657     :for (int counter = myhello->first; counter <= myhello->second; counter++)
                                                   :{
         0       0 82828 72.4148 82027 71.6343     :myhello->result += sqrt((double)counter);
                                                   :}
                                                   :}
                                                   :
                                                   :int main()
                                                   :{
                                                   :pthread_attr_t attr;
                                                   :pthread_attr_init(&attr);
                                                   :pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
                                                   :pthread_t thread1, thread2;
                                                   :hello_world *myhello;
                                                   :hello_world *nexthello;
                                                   :myhello = new hello_world;
                                                   :nexthello = new hello_world;
                                                   :myhello->first = 1.0;
                                                   :myhello->second = 500000000.0;
                                                   :nexthello->first = 500000001.0;
                                                   :nexthello->second = 1000000000.0;
                                                   :pthread_create( &thread1, &attr, myfunc, (void*)myhello);
                                                   :pthread_create( &thread2, &attr, myfunc, (void*)nexthello);
                                                   :pthread_join(thread1,NULL);
                                                   :pthread_join(thread2,NULL);
                                                   :}
    
    Linux  debian  2.6.26-2-686  #1 SMP Thu Mar 26 01:08:11 UTC 2009  i686  05/28/2009
    
    07:12:36  cpu %usr %nice   %sys %irq %softirq    %wait %idle             _cpu_
    07:12:36  all   93     0      4    0        0        0     2
                0   91     0      9    0        0        0     0
                1  100     0      0    0        0        0     0
    07:12:37  all   98     0      2    0        0        0     0
                0   97     0      2    0        0        0     1
                1  100     0      1    0        0        0     0
    07:12:38  all  100     0      1    0        0        0     0
                0   99     0      2    0        0        0     0
                1  100     0      0    0        0        0     0
    07:12:39  all   98     0      2    0        0        0     0
                0   95     0      4    0        0        0     1
                1  100     0      0    0        0        0     0
    07:12:40  all   96     0      4    0        0        0     0
                0   94     0      6    0        0        0     0
                1   98     0      2    0        0        0     0
    07:12:41  all   98     0      2    0        0        0     0
                0   99     0      1    0        0        0     0
                1   97     0      2    0        0        0     1
    07:12:42  all  100     0      1    0        0        0     0
                0  100     0      0    0        0        0     0
                1  100     0      1    0        0        0     0
    07:12:43  all   97     0      2    0        0        0     0
                0   95     0      6    0        0        0     0
                1   99     0      0    0        0        0     1
    07:12:44  all   96     0      4    0        0        0     0
                0   92     0      7    0        0        0     1
                1  100     0      1    0        0        0     0
    07:12:45  all   98     0      2    0        0        0     0
                0   95     0      3    1        0        0     1
                1   99     0      0    0        0        0     1

  2. #12
    Just Joined!
    Join Date
    Jun 2008
    Posts
    34
    Could you run the following new script "profile2.sh" and post the results? Thanks.

    NUMOTHREAD=2
    [ $# -eq 1 ] && [ $1 -eq 1 ] && NUMOTHREAD=1
    TESTDIR=/tmp
    PROG=forum
    rm -f $TESTDIR/$PROG




    # create test program source file
    cat > $TESTDIR/$PROG.c <<EOF
    #include <stdio.h>
    #include <math.h>
    #include <pthread.h>
    using namespace std;


    struct hello_world
    {
    double first;
    double second;
    double result;
    };

    void *myfunc(void *value)
    {
    hello_world myhello;
    for (int counter = 1; counter <= 500000000.0; counter++)
    {
    myhello.result += sqrt((double)counter);
    }
    }

    int main()
    {
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
    pthread_t thread1, thread2;
    hello_world *myhello;
    hello_world *nexthello;
    myhello = new hello_world;
    nexthello = new hello_world;
    myhello->first = 1.0;
    myhello->second = 500000000.0;
    nexthello->first = 500000001.0;
    nexthello->second = 1000000000.0;
    EOF

    if [ $NUMOTHREAD -eq 1 ]
    then
    cat >> $TESTDIR/$PROG.c <<EOF1
    pthread_create( &thread1, &attr, myfunc, (void*)myhello);
    pthread_join(thread1,NULL);
    }
    EOF1

    else
    cat >> $TESTDIR/$PROG.c <<EOF2
    pthread_create( &thread1, &attr, myfunc, (void*)myhello);
    pthread_create( &thread2, &attr, myfunc, (void*)nexthello);
    pthread_join(thread1,NULL);
    pthread_join(thread2,NULL);
    }
    EOF2
    fi


    # Compile
    g++ -g -lpthread -o $TESTDIR/forum $TESTDIR/$PROG.c

    # Profile setup and start
    /usr/bin/opcontrol --reset
    #/usr/bin/opcontrol --setup --no-vmlinux --separate=library --event=GLOBAL_POWER_EVENTS:750000:0x1:1:1 --separate=thread
    /usr/bin/opcontrol --setup --separate=library,thread --event=GLOBAL_POWER_EVENTS:750000:0x1:1:1
    /usr/bin/opcontrol --start
    sar 1 10 > $TESTDIR/sar$NUMOTHREAD.out &

    # execute
    /usr/bin/time -o $TESTDIR/profile$NUMOTHREAD.out $TESTDIR/$PROG

    /usr/bin/opcontrol --shutdown
    /usr/bin/opreport --long-filenames image:$TESTDIR/$PROG --threshold 1 >> $TESTDIR/profile$NUMOTHREAD.out
    opannotate image:$TESTDIR/$PROG --source >> $TESTDIR/profile$NUMOTHREAD.out
    cat $TESTDIR/sar$NUMOTHREAD.out >> $TESTDIR/profile$NUMOTHREAD.out

  3. #13
    Just Joined!
    Join Date
    Dec 2006
    Posts
    85
    1:
    Code:
    9.08user 0.00system 0:09.30elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+0outputs (0major+254minor)pagefaults 0swaps
    CPU: P4 / Xeon, speed 2400 MHz (estimated)
    Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
    Processes with a thread ID of 25231
    Processes with a thread ID of 25232
            tid:25231|        tid:25232|
      samples|      %|  samples|      %|
    ------------------------------------
            5 100.000     35848 100.000 /tmp/forum
    	        tid:25231|        tid:25232|
    	  samples|      %|  samples|      %|
    	------------------------------------
    	        5 100.000         0       0 /lib/ld-2.7.so
    	        0       0     35848 100.000 /tmp/forum
    /* 
     * Command line: opannotate image:/tmp/forum --source 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: P4 / Xeon, speed 2400 MHz (estimated)
     * Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
     * Processes with a thread ID of 25231
     * Processes with a thread ID of 25232
     */
    /* 
     * Total samples for file : "/tmp/forum.c"
     * 
     *      0       0 35848 100.000
     */
    
    
                                   :#include <stdio.h>
                                   :#include <math.h>
                                   :#include <pthread.h>
                                   :using namespace std;
                                   :
                                   :
                                   :struct hello_world
                                   :{
                                   :double first;
                                   :double second;
                                   :double result;
                                   :};
                                   :
                                   :void *myfunc(void *value) /* myfunc(void*) total:      0       0 35848 100.000 */
                                   :{
                                   :hello_world *myhello = (hello_world*)value;
         0       0  6293 17.5547   :for (int counter = myhello->first; counter <= myhello->second; counter++)
                                   :{
         0       0 29555 82.4453   :myhello->result += sqrt((double)counter);
                                   :}
                                   :}
                                   :
                                   :int main()
                                   :{
                                   :pthread_attr_t attr;
                                   :pthread_attr_init(&attr);
                                   :pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
                                   :pthread_t thread1, thread2;
                                   :hello_world *myhello;
                                   :hello_world *nexthello;
                                   :myhello = new hello_world;
                                   :nexthello = new hello_world;
                                   :myhello->first = 1.0;
                                   :myhello->second = 500000000.0;
                                   :nexthello->first = 500000001.0;
                                   :nexthello->second = 1000000000.0;
                                   :pthread_create( &thread1, &attr, myfunc, (void*)myhello);
                                   :pthread_join(thread1,NULL);
                                   :}
    
    Linux  debian  2.6.26-2-686  #1 SMP Thu Mar 26 01:08:11 UTC 2009  i686  05/29/2009
    
    02:54:50  cpu %usr %nice   %sys %irq %softirq    %wait %idle             _cpu_
    02:54:50  all   51     0      2    6        0        2    39
                0    3     0      2   13        0        2    80
                1   99     0      1    0        0        0     0
    02:54:51  all   52     0      0    0        0        0    48
                0    5     0      1    0        0        0    94
                1  100     0      1    0        0        0     0
    02:54:52  all   52     0      1    0        0        0    47
                0    5     0      0    0        0        0    95
                1   99     0      1    0        0        0     0
    02:54:53  all   52     0      2    0        0        0    46
                0    5     0      2    0        0        0    93
                1   98     0      2    0        0        0     0
    02:54:54  all   51     0      1    0        0        0    48
                0    3     0      1    0        0        0    96
                1   99     0      0    0        0        0     1
    02:54:55  all   52     0      1    0        0        0    47
                0    6     0      0    0        0        0    94
                1   99     0      2    0        0        0     0
    02:54:56  all   52     0      1    0        0        0    46
                0    6     0      2    0        0        0    92
                1   99     0      1    0        0        0     0
    02:54:57  all   54     0      2    0        0        0    44
                0    8     0      1    0        0        0    91
                1   98     0      2    0        0        0     0
    02:54:58  all   53     0      2    0        0        0    45
                0   10     0      2    0        0        0    88
                1   96     0      3    0        0        0     1
    02:54:59  all   24     0      4    0        0        1    71
                0   10     0      3    0        0        2    85
                1   38     0      5    0        0        0    57
    2:
    Code:
    60.33user 0.04system 0:32.87elapsed 183%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+0outputs (0major+255minor)pagefaults 0swaps
    CPU: P4 / Xeon, speed 2400 MHz (estimated)
    Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
    Processes with a thread ID of 25543
    Processes with a thread ID of 25544
    Processes with a thread ID of 25545
            tid:25543|        tid:25544|        tid:25545|
      samples|      %|  samples|      %|  samples|      %|
    ------------------------------------------------------
            3 100.000    119177 100.000    118939 100.000 /tmp/forum
    	        tid:25543|        tid:25544|        tid:25545|
    	  samples|      %|  samples|      %|  samples|      %|
    	------------------------------------------------------
    	        3 100.000         0       0         0       0 /lib/ld-2.7.so
    	        0       0    119177 100.000    118939 100.000 /tmp/forum
    /* 
     * Command line: opannotate image:/tmp/forum --source 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: P4 / Xeon, speed 2400 MHz (estimated)
     * Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
     * Processes with a thread ID of 25543
     * Processes with a thread ID of 25544
     * Processes with a thread ID of 25545
     */
    /* 
     * Total samples for file : "/tmp/forum.c"
     * 
     *      0       0119177 100.000118939 100.000
     */
    
    
                                                   :#include <stdio.h>
                                                   :#include <math.h>
                                                   :#include <pthread.h>
                                                   :using namespace std;
                                                   :
                                                   :
                                                   :struct hello_world
                                                   :{
                                                   :double first;
                                                   :double second;
                                                   :double result;
                                                   :};
                                                   :
                                                   :void *myfunc(void *value) /* myfunc(void*) total:      0       0119177 100.000118939 100.000 */
                                                   :{
                                                   :hello_world *myhello = (hello_world*)value;
         0       0 34227 28.7195 34948 29.3831     :for (int counter = myhello->first; counter <= myhello->second; counter++)
                                                   :{
         0       0 84950 71.2805 83991 70.6169     :myhello->result += sqrt((double)counter);
                                                   :}
                                                   :}
                                                   :
                                                   :int main()
                                                   :{
                                                   :pthread_attr_t attr;
                                                   :pthread_attr_init(&attr);
                                                   :pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
                                                   :pthread_t thread1, thread2;
                                                   :hello_world *myhello;
                                                   :hello_world *nexthello;
                                                   :myhello = new hello_world;
                                                   :nexthello = new hello_world;
                                                   :myhello->first = 1.0;
                                                   :myhello->second = 500000000.0;
                                                   :nexthello->first = 500000001.0;
                                                   :nexthello->second = 1000000000.0;
                                                   :pthread_create( &thread1, &attr, myfunc, (void*)myhello);
                                                   :pthread_create( &thread2, &attr, myfunc, (void*)nexthello);
                                                   :pthread_join(thread1,NULL);
                                                   :pthread_join(thread2,NULL);
                                                   :}
    
    Linux  debian  2.6.26-2-686  #1 SMP Thu Mar 26 01:08:11 UTC 2009  i686  05/29/2009
    
    02:55:02  cpu %usr %nice   %sys %irq %softirq    %wait %idle             _cpu_
    02:55:02  all   97     0      3    0        0        0     0
                0   94     0      6    0        0        0     0
                1  100     0      0    0        0        0     0
    02:55:03  all   99     0      1    0        0        0     0
                0   99     0      2    0        0        0     0
                1   99     0      1    0        0        0     0
    02:55:04  all   98     0      2    0        0        0     0
                0   97     0      3    0        0        0     0
                1  100     0      0    0        0        0     0
    02:55:05  all   98     0      2    0        0        0     0
                0   97     0      3    0        0        0     0
                1   99     0      1    0        0        0     0
    02:55:06  all   98     0      2    0        0        0     0
                0   96     0      4    0        0        0     0
                1   99     0      1    0        0        0     0
    02:55:07  all   95     0      4    1        0        0     0
                0   92     0      6    2        0        0     0
                1   99     0      1    0        0        0     0
    02:55:08  all   99     0      1    0        0        0     0
                0   99     0      1    0        0        0     0
                1   98     0      2    0        0        0     0
    02:55:09  all   99     0      1    0        0        0     0
                0   98     0      2    0        0        0     0
                1  100     0      0    0        0        0     0
    02:55:10  all   98     0      2    0        0        0     0
                0  100     0      0    0        0        0     0
                1   98     0      2    0        0        0     0
    02:55:11  all   98     0      2    0        0        0     0
                0   98     0      2    0        0        0     0
                1   99     0      1    0        0        0     0

  4. #14
    Just Joined!
    Join Date
    Jun 2008
    Posts
    34
    It seems you are still using the old "profile.sh" script as can be seen in the profile output:
    :void *myfunc(void *value) /* myfunc(void*) total: 0 0119177 100.000118939 100.000 */
    :{
    :hello_world *myhello = (hello_world*)value;
    0 0 34227 28.7195 34948 29.3831 :for (int counter = myhello->first; counter <= myhello->second; counter++)
    :{
    0 0 84950 71.2805 83991 70.6169 :myhello->result += sqrt((double)counter);
    :}
    :}



    Please use the "profile2.sh" script in my previous reply.

  5. #15
    Just Joined!
    Join Date
    Dec 2006
    Posts
    85
    oops, forgot i put it in a dif file..:
    1:
    Code:
    10.66user 0.00system 0:10.78elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+0outputs (0major+255minor)pagefaults 0swaps
    CPU: P4 / Xeon, speed 2400 MHz (estimated)
    Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
    Processes with a thread ID of 28391
    Processes with a thread ID of 28392
            tid:28391|        tid:28392|
      samples|      %|  samples|      %|
    ------------------------------------
            4 100.000     42081 100.000 /tmp/forum
    	        tid:28391|        tid:28392|
    	  samples|      %|  samples|      %|
    	------------------------------------
    	        4 100.000         0       0 /lib/ld-2.7.so
    	        0       0     42081 100.000 /tmp/forum
    /* 
     * Command line: opannotate image:/tmp/forum --source 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: P4 / Xeon, speed 2400 MHz (estimated)
     * Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
     * Processes with a thread ID of 28391
     * Processes with a thread ID of 28392
     */
    /* 
     * Total samples for file : "/tmp/forum.c"
     * 
     *      0       0 42081 100.000
     */
    
    
                                   :#include <stdio.h>
                                   :#include <math.h>
                                   :#include <pthread.h>
                                   :using namespace std;
                                   :
                                   :
                                   :struct hello_world
                                   :{
                                   :double first;
                                   :double second;
                                   :double result;
                                   :};
                                   :
                                   :void *myfunc(void *value) /* myfunc(void*) total:      0       0 42081 100.000 */
                                   :{
                                   :hello_world myhello;
         0       0  3437  8.1676   :for (int counter = 1; counter <= 500000000.0; counter++)
                                   :{
         0       0 38644 91.8324   :myhello.result += sqrt((double)counter);
                                   :}
                                   :}
                                   :
                                   :int main()
                                   :{
                                   :pthread_attr_t attr;
                                   :pthread_attr_init(&attr);
                                   :pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
                                   :pthread_t thread1, thread2;
                                   :hello_world *myhello;
                                   :hello_world *nexthello;
                                   :myhello = new hello_world;
                                   :nexthello = new hello_world;
                                   :myhello->first = 1.0;
                                   :myhello->second = 500000000.0;
                                   :nexthello->first = 500000001.0;
                                   :nexthello->second = 1000000000.0;
                                   :pthread_create( &thread1, &attr, myfunc, (void*)myhello);
                                   :pthread_join(thread1,NULL);
                                   :}
    
    Linux  debian  2.6.26-2-686  #1 SMP Thu Mar 26 01:08:11 UTC 2009  i686  05/29/2009
    
    18:29:57  cpu %usr %nice   %sys %irq %softirq    %wait %idle             _cpu_
    18:29:58  all   54     0      0    0        0        0    46
                0  100     0      0    0        0        0     0
                1    7     0      1    0        0        0    92
    18:29:59  all   53     0      1    0        0        0    46
                0   99     0      0    0        0        0     1
                1    6     0      1    0        0        0    93
    18:30:00  all   53     0      0    0        0        0    47
                0  100     0      0    0        0        0     0
                1    6     0      1    0        0        0    93
    18:30:01  all   53     0      1    0        0        0    46
                0   99     0      0    0        0        0     1
                1    8     0      1    0        0        0    91
    18:30:02  all   53     0      0    0        0        0    46
                0  100     0      0    0        0        0     0
                1    5     0      1    0        0        0    94
    18:30:03  all   54     0      0    0        0        0    45
                0  100     0      0    0        0        0     0
                1    9     0      1    0        0        0    90
    18:30:04  all   54     0      0    0        0        0    46
                0  100     0      0    0        0        0     0
                1    8     0      0    0        0        0    92
    18:30:05  all   52     0      1    0        0        0    46
                0  100     0      0    0        0        0     0
                1    5     0      2    0        0        0    93
    18:30:06  all   54     0      2    0        0        0    44
                0  100     0      0    0        0        0     0
                1    8     0      4    0        0        0    88
    18:30:07  all   53     0      0    0        0        0    47
                0  100     0      0    0        0        0     0
                1    6     0      0    0        0        0    94
    2:
    Code:
    21.52user 0.04system 0:11.36elapsed 189%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+0outputs (0major+255minor)pagefaults 0swaps
    CPU: P4 / Xeon, speed 2400 MHz (estimated)
    Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
    Processes with a thread ID of 28694
    Processes with a thread ID of 28695
    Processes with a thread ID of 28696
            tid:28694|        tid:28695|        tid:28696|
      samples|      %|  samples|      %|  samples|      %|
    ------------------------------------------------------
            2 100.000     42094 100.000     42072 100.000 /tmp/forum
    	        tid:28694|        tid:28695|        tid:28696|
    	  samples|      %|  samples|      %|  samples|      %|
    	------------------------------------------------------
    	        2 100.000         0       0         0       0 /lib/ld-2.7.so
    	        0       0     42094 100.000     42072 100.000 /tmp/forum
    /* 
     * Command line: opannotate image:/tmp/forum --source 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: P4 / Xeon, speed 2400 MHz (estimated)
     * Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
     * Processes with a thread ID of 28694
     * Processes with a thread ID of 28695
     * Processes with a thread ID of 28696
     */
    /* 
     * Total samples for file : "/tmp/forum.c"
     * 
     *      0       0 42094 100.000 42072 100.000
     */
    
    
                                                   :#include <stdio.h>
                                                   :#include <math.h>
                                                   :#include <pthread.h>
                                                   :using namespace std;
                                                   :
                                                   :
                                                   :struct hello_world
                                                   :{
                                                   :double first;
                                                   :double second;
                                                   :double result;
                                                   :};
                                                   :
                                                   :void *myfunc(void *value) /* myfunc(void*) total:      0       0 42094 100.000 42072 100.000 */
                                                   :{
                                                   :hello_world myhello;
         0       0  4574 10.8662  3372  8.0148     :for (int counter = 1; counter <= 500000000.0; counter++)
                                                   :{
         0       0 37520 89.1338 38700 91.9852     :myhello.result += sqrt((double)counter);
                                                   :}
                                                   :}
                                                   :
                                                   :int main()
                                                   :{
                                                   :pthread_attr_t attr;
                                                   :pthread_attr_init(&attr);
                                                   :pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
                                                   :pthread_t thread1, thread2;
                                                   :hello_world *myhello;
                                                   :hello_world *nexthello;
                                                   :myhello = new hello_world;
                                                   :nexthello = new hello_world;
                                                   :myhello->first = 1.0;
                                                   :myhello->second = 500000000.0;
                                                   :nexthello->first = 500000001.0;
                                                   :nexthello->second = 1000000000.0;
                                                   :pthread_create( &thread1, &attr, myfunc, (void*)myhello);
                                                   :pthread_create( &thread2, &attr, myfunc, (void*)nexthello);
                                                   :pthread_join(thread1,NULL);
                                                   :pthread_join(thread2,NULL);
                                                   :}
    
    Linux  debian  2.6.26-2-686  #1 SMP Thu Mar 26 01:08:11 UTC 2009  i686  05/29/2009
    
    18:30:11  cpu %usr %nice   %sys %irq %softirq    %wait %idle             _cpu_
    18:30:11  all  100     0      0    0        0        0     0
                0  100     0      0    0        0        0     0
                1  100     0      0    0        0        0     0
    18:30:12  all   99     0      1    0        0        0     0
                0   98     0      2    0        0        0     0
                1  100     0      0    0        0        0     0
    18:30:13  all  100     0      0    0        0        0     0
                0   99     0      1    0        0        0     0
                1  100     0      0    0        0        0     0
    18:30:14  all  100     0      0    0        0        0     0
                0   99     0      0    0        0        0     1
                1  100     0      0    0        0        0     0
    18:30:15  all  100     0      0    0        0        0     0
                0  100     0      1    0        0        0     0
                1  100     0      0    0        0        0     0
    18:30:16  all   98     0      2    0        0        0     0
                0   97     0      2    0        0        0     1
                1   98     0      1    0        1        0     0
    18:30:17  all  100     0      0    0        0        0     0
                0  100     0      0    0        0        0     0
                1   99     0      0    0        0        0     1
    18:30:18  all  100     0      0    0        0        0     0
                0  100     0      0    0        0        0     0
                1  100     0      1    0        0        0     0
    18:30:19  all  100     0      0    0        0        0     0
                0  100     0      0    0        0        0     0
                1  100     0      0    0        0        0     0
    18:30:20  all  100     0      0    0        0        0     0
                0   99     0      0    1        0        0     0
                1  100     0      0    0        0        0     0

  6. #16
    Just Joined!
    Join Date
    Jun 2008
    Posts
    34
    Good, the results of "profile2.sh" confirms my suspicion.
    I believe the performance problem is caused by expensive cache invalidates due to "false sharing".
    Although thread1 and thread2 access different variables myhello and nexthello respectively in the main thread, these two variables reside next to each other in memory during program execution and hence mightl be in the same cache line in each of the CPU. This causes "false sharing" of memory by the two threads.

    If we look at "profile.sh" output (which uses your original thread function):
    void *myfunc(void *value) /* myfunc(void*) total: 0 0119177 100.000118939 100.000 */
    :{
    :hello_world *myhello = (hello_world*)value;
    0 0 34227 28.7195 34948 29.3831 :for (int counter = myhello->first; counter <= myhello->second; counter++)
    :{
    0 0 84950 71.2805 83991 70.6169 :myhello->result += sqrt((double)counter);
    :}
    :}

    Each thread takes around the following number of samples (which is directly proportional to CPU time):
    1. 34000 for the loop control
    2. 84000 for the sqrt computation

    Now if we look at "profile2.sh" output (in which I have changed all references to myhello/nexthello in the main thread to local variable "myhello" in myfunc())
    No more false sharing.
    :void *myfunc(void *value) /* myfunc(void*) total: 0 0 42094 100.000 42072 100.000 */
    :{
    :hello_world myhello;
    0 0 4574 10.8662 3372 8.0148 :for (int counter = 1; counter <= 500000000.0; counter++)
    :{
    0 0 37520 89.1338 38700 91.9852 :myhello.result += sqrt((double)counter);
    :}
    :}

    Now, each thread takes around the following number of samples
    1. 4000 for the loop control
    2. 38000 for the sqrt computation
    These are substantially less that those for profile.sh
    The important thing is they are around the same as a single thread (also from profile2.sh output):
    :void *myfunc(void *value) /* myfunc(void*) total: 0 0 42081 100.000 */
    :{
    :hello_world myhello;
    0 0 3437 8.1676 :for (int counter = 1; counter <= 500000000.0; counter++)
    :{
    0 0 38644 91.8324 :myhello.result += sqrt((double)counter);
    :}
    :}

    If you compare the "time" results for one and two threads in profile2.sh :
    one thread(profile1.out): 10.66user 0.00system 0:10.78elapsed 99%CPU
    two thread(profile2.out): 21.52user 0.04system 0:11.36elapsed 189%CPU
    You will see they are the same i.e. around 11s elapsed time as expected.

    Please try the following script "profile3.sh" to see whether we have successfully eliminated the "false sharing" by some modification to your original code:

    NUMOTHREAD=2
    [ $# -eq 1 ] && [ $1 -eq 1 ] && NUMOTHREAD=1
    TESTDIR=/tmp
    PROG=forum
    rm -f $TESTDIR/$PROG




    # create test program source file
    cat > $TESTDIR/$PROG.c <<EOF
    #include <stdio.h>
    #include <math.h>
    #include <pthread.h>
    using namespace std;


    struct hello_world
    {
    double first;
    double second;
    double result;
    };

    void *myfunc(void *value)
    {
    hello_world *myhello = (hello_world*)value;
    for (int counter = myhello->first; counter <= myhello->second; counter++)
    {
    myhello->result += sqrt((double)counter);
    }
    }

    int main()
    {
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
    pthread_t thread1, thread2;
    hello_world *myhello;
    hello_world *nexthello;
    myhello = new hello_world;

    /* attempt to eliminate false sharing of cache line */
    printf("myhello:%x\n",myhello);
    int iterate=128/sizeof(hello_world);
    printf("struct size=%i\n", sizeof(hello_world));
    hello_world *dummy[iterate];
    for (int i=0;i<iterate;i++) { printf("i=%i\n",i);
    dummy[i]=new hello_world;
    dummy[i]->result=0; }

    nexthello = new hello_world;

    printf("nexthello:%x\n",nexthello);

    myhello->first = 1.0;
    myhello->second = 500000000.0;
    nexthello->first = 500000001.0;
    nexthello->second = 1000000000.0;
    EOF

    if [ $NUMOTHREAD -eq 1 ]
    then
    cat >> $TESTDIR/$PROG.c <<EOF1
    pthread_create( &thread1, &attr, myfunc, (void*)myhello);
    pthread_join(thread1,NULL);
    }
    EOF1

    else
    cat >> $TESTDIR/$PROG.c <<EOF2
    pthread_create( &thread1, &attr, myfunc, (void*)myhello);
    pthread_create( &thread2, &attr, myfunc, (void*)nexthello);
    pthread_join(thread1,NULL);
    pthread_join(thread2,NULL);
    }
    EOF2
    fi


    # Compile
    g++ -g -lpthread -o $TESTDIR/forum $TESTDIR/$PROG.c

    # Profile setup and start
    /usr/bin/opcontrol --reset
    #/usr/bin/opcontrol --setup --no-vmlinux --separate=library --event=GLOBAL_POWER_EVENTS:750000:0x1:1:1 --separate=thread
    /usr/bin/opcontrol --setup --separate=library,thread --event=GLOBAL_POWER_EVENTS:750000:0x1:1:1
    /usr/bin/opcontrol --start
    sar 1 10 > $TESTDIR/sar$NUMOTHREAD.out &

    # execute
    /usr/bin/time -o $TESTDIR/profile$NUMOTHREAD.out $TESTDIR/$PROG > $TESTDIR/$PROG.out

    /usr/bin/opcontrol --shutdown
    /usr/bin/opreport --long-filenames image:$TESTDIR/$PROG --threshold 1 >> $TESTDIR/profile$NUMOTHREAD.out
    opannotate image:$TESTDIR/$PROG --source >> $TESTDIR/profile$NUMOTHREAD.out
    cat $TESTDIR/sar$NUMOTHREAD.out >> $TESTDIR/profile$NUMOTHREAD.out
    cat $TESTDIR/$PROG.out >> $TESTDIR/profile$NUMOTHREAD.out

    I found one good article on "false sharing" on the internet which you might want to read:
    .NET Matters: False Sharing

  7. #17
    Just Joined!
    Join Date
    Dec 2006
    Posts
    85
    1:
    Code:
    9.06user 0.02system 0:09.39elapsed 96%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+8outputs (0major+272minor)pagefaults 0swaps
    CPU: P4 / Xeon, speed 2400 MHz (estimated)
    Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
    Processes with a thread ID of 31653
    Processes with a thread ID of 31654
            tid:31653|        tid:31654|
      samples|      %|  samples|      %|
    ------------------------------------
            3 100.000     35944 100.000 /tmp/forum
    	        tid:31653|        tid:31654|
    	  samples|      %|  samples|      %|
    	------------------------------------
    	        2 66.6667         1  0.0028 /lib/ld-2.7.so
    	        1 33.3333         0       0 /lib/i686/cmov/libc-2.7.so
    	        0       0     35943 99.9972 /tmp/forum
    /* 
     * Command line: opannotate image:/tmp/forum --source 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: P4 / Xeon, speed 2400 MHz (estimated)
     * Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
     * Processes with a thread ID of 31653
     * Processes with a thread ID of 31654
     */
    /* 
     * Total samples for file : "/tmp/forum.c"
     * 
     *      0       0 35943 99.9972
     */
    
    
                                   :#include <stdio.h>
                                   :#include <math.h>
                                   :#include <pthread.h>
                                   :using namespace std;
                                   :
                                   :
                                   :struct hello_world
                                   :{
                                   :double first;
                                   :double second;
                                   :double result;
                                   :};
                                   :
                                   :void *myfunc(void *value) /* myfunc(void*) total:      0       0 35943 99.9972 */
                                   :{
                                   :hello_world *myhello = (hello_world*)value;
         0       0  3768 10.4830   :for (int counter = myhello->first; counter <= myhello->second; counter++)
                                   :{
         0       0 32175 89.5142   :myhello->result += sqrt((double)counter);
                                   :}
                                   :}
                                   :
                                   :int main()
                                   :{
                                   :pthread_attr_t attr;
                                   :pthread_attr_init(&attr);
                                   :pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
                                   :pthread_t thread1, thread2;
                                   :hello_world *myhello;
                                   :hello_world *nexthello;
                                   :myhello = new hello_world;
                                   :
                                   :/* attempt to eliminate false sharing of cache line */
                                   :printf("myhello:%x\n",myhello);
                                   :int iterate=128/sizeof(hello_world);
                                   :printf("struct size=%i\n", sizeof(hello_world));
                                   :hello_world *dummy[iterate];
                                   :for (int i=0;i<iterate;i++) { printf("i=%i\n",i);
                                   :dummy[i]=new hello_world;
                                   :dummy[i]->result=0; }
                                   :
                                   :nexthello = new hello_world;
                                   :
                                   :printf("nexthello:%x\n",nexthello);
                                   :
                                   :myhello->first = 1.0;
                                   :myhello->second = 500000000.0;
                                   :nexthello->first = 500000001.0;
                                   :nexthello->second = 1000000000.0;
                                   :pthread_create( &thread1, &attr, myfunc, (void*)myhello);
                                   :pthread_join(thread1,NULL);
                                   :}
    
    Linux  debian  2.6.26-2-686  #1 SMP Thu Mar 26 01:08:11 UTC 2009  i686  05/30/2009
    
    02:29:35  cpu %usr %nice   %sys %irq %softirq    %wait %idle             _cpu_
    02:29:35  all   59     0      2    0        0        0    39
                0  100     0      0    0        0        0     0
                1   19     0      5    0        0        0    76
    02:29:36  all   58     0      2    0        0        0    40
                0   98     0      2    0        1        0     0
                1   17     0      3    0        0        0    80
    02:29:37  all   56     0      5    0        0        0    39
                0   96     0      4    0        0        0     0
                1   17     0      5    0        0        0    78
    02:29:38  all   56     0      3    1        0        0    40
                0   96     0      3    2        0        0     0
                1   15     0      4    0        0        0    81
    02:29:39  all   58     0      4    0        0        0    38
                0  100     0      0    0        0        0     0
                1   15     0      7    0        0        0    78
    02:29:40  all   58     0      3    0        0        0    38
                0   99     0      0    0        0        0     1
                1   17     0      7    0        0        0    76
    02:29:41  all   58     0      3    0        0        0    38
                0  100     0      0    0        0        0     0
                1   17     0      6    0        0        0    77
    02:29:42  all   58     0      4    0        0        0    38
                0  100     0      0    0        0        0     0
                1   17     0      7    0        0        0    76
    02:29:43  all   58     0      4    0        0        0    39
                0  100     0      0    0        0        0     0
                1   15     0      7    0        0        0    78
    02:29:44  all   41     0      8    0        0        9    42
                0   64     0      8    0        0       12    16
                1   19     0      8    0        0        5    68
    myhello:9b2e008
    struct size=24
    i=0
    i=1
    i=2
    i=3
    i=4
    nexthello:9b2e0c8
    2:
    Code:
    18.02user 0.02system 0:09.87elapsed 182%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+8outputs (0major+272minor)pagefaults 0swaps
    CPU: P4 / Xeon, speed 2400 MHz (estimated)
    Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
    Processes with a thread ID of 31950
    Processes with a thread ID of 31951
    Processes with a thread ID of 31952
            tid:31950|        tid:31951|        tid:31952|
      samples|      %|  samples|      %|  samples|      %|
    ------------------------------------------------------
            3 100.000     35951 100.000     35954 100.000 /tmp/forum
    	        tid:31950|        tid:31951|        tid:31952|
    	  samples|      %|  samples|      %|  samples|      %|
    	------------------------------------------------------
    	        3 100.000         0       0         0       0 /lib/ld-2.7.so
    	        0       0     35951 100.000     35954 100.000 /tmp/forum
    /* 
     * Command line: opannotate image:/tmp/forum --source 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: P4 / Xeon, speed 2400 MHz (estimated)
     * Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 750000
     * Processes with a thread ID of 31950
     * Processes with a thread ID of 31951
     * Processes with a thread ID of 31952
     */
    /* 
     * Total samples for file : "/tmp/forum.c"
     * 
     *      0       0 35951 100.000 35954 100.000
     */
    
    
                                                   :#include <stdio.h>
                                                   :#include <math.h>
                                                   :#include <pthread.h>
                                                   :using namespace std;
                                                   :
                                                   :
                                                   :struct hello_world
                                                   :{
                                                   :double first;
                                                   :double second;
                                                   :double result;
                                                   :};
                                                   :
                                                   :void *myfunc(void *value) /* myfunc(void*) total:      0       0 35951 100.000 35954 100.000 */
                                                   :{
                                                   :hello_world *myhello = (hello_world*)value;
         0       0  3991 11.1012  3753 10.4383     :for (int counter = myhello->first; counter <= myhello->second; counter++)
                                                   :{
         0       0 31960 88.8988 32201 89.5617     :myhello->result += sqrt((double)counter);
                                                   :}
                                                   :}
                                                   :
                                                   :int main()
                                                   :{
                                                   :pthread_attr_t attr;
                                                   :pthread_attr_init(&attr);
                                                   :pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
                                                   :pthread_t thread1, thread2;
                                                   :hello_world *myhello;
                                                   :hello_world *nexthello;
                                                   :myhello = new hello_world;
                                                   :
                                                   :/* attempt to eliminate false sharing of cache line */
                                                   :printf("myhello:%x\n",myhello);
                                                   :int iterate=128/sizeof(hello_world);
                                                   :printf("struct size=%i\n", sizeof(hello_world));
                                                   :hello_world *dummy[iterate];
                                                   :for (int i=0;i<iterate;i++) { printf("i=%i\n",i);
                                                   :dummy[i]=new hello_world;
                                                   :dummy[i]->result=0; }
                                                   :
                                                   :nexthello = new hello_world;
                                                   :
                                                   :printf("nexthello:%x\n",nexthello);
                                                   :
                                                   :myhello->first = 1.0;
                                                   :myhello->second = 500000000.0;
                                                   :nexthello->first = 500000001.0;
                                                   :nexthello->second = 1000000000.0;
                                                   :pthread_create( &thread1, &attr, myfunc, (void*)myhello);
                                                   :pthread_create( &thread2, &attr, myfunc, (void*)nexthello);
                                                   :pthread_join(thread1,NULL);
                                                   :pthread_join(thread2,NULL);
                                                   :}
    
    Linux  debian  2.6.26-2-686  #1 SMP Thu Mar 26 01:08:11 UTC 2009  i686  05/30/2009
    
    02:29:46  cpu %usr %nice   %sys %irq %softirq    %wait %idle             _cpu_
    02:29:46  all   95     0      2    0        0        0     3
                0   90     0      3    0        0        0     6
                1   97     0      0    0        0        0     3
    02:29:47  all   99     0      1    0        0        0     0
                0   99     0      2    0        0        0     0
                1  100     0      0    0        0        0     0
    02:29:48  all  100     0      0    0        0        0     0
                0   98     0      2    0        0        0     0
                1  100     0      0    0        0        0     0
    02:29:49  all   99     0      1    0        0        0     0
                0   99     0      1    0        0        0     0
                1  100     0      0    0        0        0     0
    02:29:50  all  100     0      0    0        0        0     0
                0   99     0      1    0        0        0     0
                1  100     0      1    0        0        0     0
    02:29:51  all   98     0      2    0        0        0     0
                0   97     0      3    0        0        0     0
                1  100     0      0    0        0        0     0
    02:29:52  all   99     0      1    0        0        0     0
                0   98     0      1    0        0        0     1
                1  100     0      0    0        0        0     0
    02:29:53  all   99     0      0    0        0        0     0
                0   98     0      2    0        1        0     0
                1  100     0      0    0        0        0     0
    02:29:54  all   99     0      1    0        0        0     0
                0   98     0      2    0        0        0     0
                1  100     0      0    0        0        0     0
    02:29:55  all   91     0      0    0        0        0     8
                0   99     0      1    0        0        0     0
                1   83     0      0    0        0        0    17
    myhello:a037008
    struct size=24
    i=0
    i=1
    i=2
    i=3
    i=4
    nexthello:a0370c8
    [/CODE]

  8. #18
    Just Joined!
    Join Date
    Jun 2008
    Posts
    34
    Ok. So the output of profile3.sh again confirms the performance problem was due to "false sharing"
    By adding the following code in between the myhello/nexthello memory allocation:
    :int iterate=128/sizeof(hello_world);
    rintf("struct size=%i\n", sizeof(hello_world));
    :hello_world *dummy[iterate];
    :for (int i=0;i<iterate;i++) { printf("i=%i\n",i);
    :dummy[i]=new hello_world;
    :dummy[i]->result=0; }
    we have successfully avoided the "false sharing", i.e. making myhello and nexthello reside in different cache line.

    This is indicated in the time output:
    9.06user 0.02system 0:09.39elapsed 96%CPU for 1 thread
    18.02user 0.02system 0:09.87elapsed 182%CPU for 2 threads
    Both have elapsed time of around 10s, as expected.

    So this explains all the apparently "weird" results you have observed previously.
    Hope all this helps.

    -Steve

  9. #19
    Just Joined!
    Join Date
    Dec 2006
    Posts
    85
    hmm...however they both have the same time, shouldnt the 2 core solution run faster?

    well either way it has definately helped, 10 seconds is a far cry better than 34...many thanks

  10. #20
    Just Joined!
    Join Date
    Jun 2008
    Posts
    34
    Sorry if I haven't made it absolutely clear.
    Please remember each thread in the one thread and two thread programs performs the same amount of iterations (i.e.500000000)
    See source code:
    myhello->first = 1.0;
    myhello->second = 500000000.0;
    nexthello->first = 500000001.0;
    nexthello->second = 1000000000.0;
    So the two thread program is actually performing twice the workload of the one thread program.
    If you run the two thread program on a single cpu system, it should take twice the amount of time that the one thread program takes.
    So the fact that the one thread program takes the same amount of time as the two thread program means the 2 core solution run twice as fast as a single cpu system.

    To illustrate this in another way, and also to verify the default thread scope of your system, please execute the following script
    "profile4.sh" and post the result of "./profile4.sh 2":
    NUMOTHREAD=2
    [ $# -eq 1 ] && [ $1 -eq 1 ] && NUMOTHREAD=1
    TESTDIR=/tmp
    PROG=forum
    rm -f $TESTDIR/$PROG




    # create test program source file
    cat > $TESTDIR/$PROG.c <<EOF
    #include <stdio.h>
    #include <math.h>
    #include <pthread.h>
    using namespace std;


    struct hello_world
    {
    double first;
    double second;
    double result;
    };

    void *myfunc(void *value)
    {
    hello_world *myhello = (hello_world*)value;
    for (int counter = myhello->first; counter <= myhello->second; counter++)
    {
    myhello->result += sqrt((double)counter);
    }
    }

    int main()
    {
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS);
    pthread_t thread1, thread2;
    hello_world *myhello;
    hello_world *nexthello;
    myhello = new hello_world;

    /* attempt to eliminate false sharing of cache line */
    printf("myhello:%x\n",myhello);
    int iterate=128/sizeof(hello_world);
    printf("struct size=%i\n", sizeof(hello_world));
    hello_world *dummy[iterate];
    for (int i=0;i<iterate;i++) { printf("i=%i\n",i);
    dummy[i]=new hello_world;
    dummy[i]->result=0; }

    nexthello = new hello_world;

    printf("nexthello:%x\n",nexthello);

    myhello->first = 1.0;
    myhello->second = 500000000.0;
    nexthello->first = 500000001.0;
    nexthello->second = 1000000000.0;
    EOF

    if [ $NUMOTHREAD -eq 1 ]
    then
    cat >> $TESTDIR/$PROG.c <<EOF1
    pthread_create( &thread1, &attr, myfunc, (void*)myhello);
    pthread_join(thread1,NULL);
    }
    EOF1

    else
    cat >> $TESTDIR/$PROG.c <<EOF2
    pthread_create( &thread1, &attr, myfunc, (void*)myhello);
    pthread_create( &thread2, &attr, myfunc, (void*)nexthello);
    pthread_join(thread1,NULL);
    pthread_join(thread2,NULL);
    }
    EOF2
    fi


    # Compile
    g++ -g -lpthread -o $TESTDIR/forum $TESTDIR/$PROG.c

    # Profile setup and start
    /usr/bin/opcontrol --reset
    #/usr/bin/opcontrol --setup --no-vmlinux --separate=library --event=GLOBAL_POWER_EVENTS:750000:0x1:1:1 --separate=thread
    /usr/bin/opcontrol --setup --separate=library,thread --event=GLOBAL_POWER_EVENTS:750000:0x1:1:1
    /usr/bin/opcontrol --start
    sar 1 10 > $TESTDIR/sar$NUMOTHREAD.out &

    # execute
    /usr/bin/time -o $TESTDIR/profile$NUMOTHREAD.out $TESTDIR/$PROG > $TESTDIR/$PROG.out

    /usr/bin/opcontrol --shutdown
    /usr/bin/opreport --long-filenames image:$TESTDIR/$PROG --threshold 1 >> $TESTDIR/profile$NUMOTHREAD.out
    opannotate image:$TESTDIR/$PROG --source >> $TESTDIR/profile$NUMOTHREAD.out
    cat $TESTDIR/sar$NUMOTHREAD.out >> $TESTDIR/profile$NUMOTHREAD.out
    cat $TESTDIR/$PROG.out >> $TESTDIR/profile$NUMOTHREAD.out
    "profile4.sh" differs from "profile3.sh" only in the following statement:
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS);
    I expect the program will use one CPU at a time.

Page 2 of 3 FirstFirst 1 2 3 LastLast

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •