Hello there,

I'm writing a small code to test real time pthread behavior.
Everyone would expect real time threads to run quicker than normal threads.
that's true on average but false in general :
The code distributes jobs to N threads (N=number of cores), measures computation times on several runs, and then shows basic stats (min time, max time, mean time).

to run it with normal scheduler, simply launch it
to run it with real time scheduler, launch it as root (sudo) with one parameter

result is : in real time, mean time is slightly lower (as expected) but max time is often hugely larger !!!!
I beleive this is a difficult problem pertaining to how kernel really schedules tasks and threads, this is why I post it here in Linux Kernel.
Does anyone have a clue ?

PS :
-Version-
Kernel : Linux 2.6.32-24-generic (x86_64)
Compiled : #42-Ubuntu SMP Fri Aug 20 14:21:58 UTC 2010
C Library : GNU C Library version 2.11.1 (stable)
Default C Compiler : GNU C Compiler version 4.4.3 (Ubuntu 4.4.3-4ubuntu5)
Distribution : Ubuntu 10.04.1 LTS


// ********************************************
// distribution of jobs to optimal number of threads for simple linear algebra algo (matrixes product)
// threads can be normal or real time
// real time is measured in Ás and, surprise, real time tasks can behave *worse* than non real time !
#include <iostream>
#include <iomanip>
#include <pthread.h>
#include <ctime>
#include <sys/time.h>
#include <cstdlib>
#include <limits>
#include <unistd.h>
#include <errno.h>
#include <cstdio>

using namespace std;

enum {timer_record=true, timer_norecord=false};

// ******* Class timer_us
class timer_us // measures time intervals (true, not CPU) with Ás precision
{
public:
timer_us() {gettimeofday(&t0_, NULL); ti_=t0_;}
long gett0_us(bool); // returns elpased time in Ás from creation of object
long getti_us(bool); // returns elapsed time in Ás from creation or last call with true parameter
protected:
timeval t0_, ti_; // time of creation and last interval from last call with true parameter
};

const long max_sec = numeric_limits<long>::max()/1000000;
long timer_us::gett0_us(bool set_ti)
{
timeval t;
gettimeofday(&t, NULL); // get current time
if (set_ti) ti_ = t; // set interval timer if requested
long dt_sec = t.tv_sec-t0_.tv_sec; // sec (1e6 Ás) part of the interval
if (dt_sec>max_sec) return -1; // if long overflow
return t.tv_usec-t0_.tv_usec+dt_sec*1000000;
}

long timer_us::getti_us(bool set_ti)
{
timeval t;
long res;
gettimeofday(&t, NULL);
long dt_sec = t.tv_sec-ti_.tv_sec;
if (dt_sec>max_sec) res = -1;
else res = t.tv_usec-ti_.tv_usec+dt_sec*1000000;
if (set_ti) ti_ = t; // set interval timer if requested
return res;
}

// ******** Class Runnable
void* th_entry(void* args);
class Runnable // a basic class that wraps threads
{
public:
Runnable();
void start();
void stop() {if (th!=0) pthread_cancel(th);}
void th_wait() {if (th!=0) pthread_join(th, NULL);}
int get_ID() {return th_ID;}
pthread_t get_pth() {return th;}
static void set_autostart(bool val) {autostart = val;}
protected:
virtual void run() {}
private:
static int th_CNT;
static bool autostart;
pthread_t th;
int th_ID;
friend void* th_entry(void* self);
};

int Runnable::th_CNT = 0;
bool Runnable::autostart = false;

Runnable::Runnable()
{
th_ID = ++th_CNT;
th = 0;
if (autostart) start();
}

void Runnable::start()
{
if (th==0)
if (pthread_create(&th, NULL, th_entry, reinterpret_cast<void*>(this))!=0) // pthreads inherits
perror("pthread_create"); // policy & priority at
} // this point, ie SCHED_FIFO, 2 if root and 1 parameter
// or SCHED_OTHER, 0 if no parameter

void* th_entry(void* self)
{
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
reinterpret_cast<Runnable*>(self)->run();
return NULL;
}

class Synchronized // a basic class for synchronized objects
{
public:
Synchronized() { if (!(cnt++)) pthread_mutex_init(&mutex, NULL); }
~Synchronized() { if (!(--cnt)) pthread_mutex_destroy(&mutex); }
pthread_mutex_t* mutget() { return &mutex; }
protected:
static pthread_mutex_t mutex;
static int cnt;
};
#define enter_critical() pthread_mutex_lock(mutget());pthread_cleanup_push( reinterpret_cast<void(*)(void*)>(pthread_mutex_unl ock), reinterpret_cast<void*>(mutget()))
#define leave_critical() pthread_cleanup_pop(1)

pthread_mutex_t Synchronized::mutex;
int Synchronized::cnt = 0;

const int MatDim = 200; // Dimension of matrixes

class Job : public Synchronized // class that distributes jobs to threads
{
public:
void init() { line=0; }
int get(); // returns the line# that threads has to compute
protected:
int line;
};

int Job::get()
{
int val;
enter_critical();
if (line==MatDim) val = -1;
else val = line++;
leave_critical();
return val;
}

Job job;
double A[MatDim][MatDim], B[MatDim][MatDim], C[MatDim][MatDim];

class Thread : public Runnable
{
protected:
void run();
};

void Thread::run()
{
int l;
while ((l=job.get())>=0)
for (int j=0 ; j<MatDim ; j++) {
C[l][j] = 0.0;
for (int k=0 ; k<MatDim ; k++)
C[l][j] += A[l][k]*B[k][j];
}
}

int get_runtime(int nb_th)
{
job.init();
timer_us T1; // timer starts
Thread T[nb_th]; // threads created
for (int i=0 ; i<nb_th ; i++)
T[i].th_wait();
return T1.gett0_us(timer_norecord); // return time in Ás
}

void set_hiprio()
{
sched_param param;
param.sched_priority = 2;
if (sched_setscheduler(0, SCHED_FIFO, &param) < 0) { // runs only with appropriate capacity (root)
perror("sched_setscheduler");
exit(1);
}
}

const int scale = RAND_MAX/10000;

int main(int argc, char **argv) {
srand(time(0));
for (int i=0 ; i<MatDim ; i++)
for (int j=0 ; j<MatDim ; j++) { // set up 2 random matrixes
A[i][j] = rand()/scale-5000;
B[i][j] = rand()/scale-5000;
}
const int nb_run = 80;
int laps, min_time, max_time, mean_time;
mean_time = 0;
min_time = 10000000;
max_time = 0;

Runnable::set_autostart(true);
if (argc==2) set_hiprio(); // switch to real time if parameter and root
for (int j=0 ; j<nb_run ; j++) {
laps = get_runtime(sysconf(_SC_NPROCESSORS_ONLN)); // measures time of execution
mean_time += laps; // makes basic stats
if (laps<min_time) min_time = laps;
if (laps>max_time) max_time = laps;
}
mean_time /= nb_run;
cout << "min=" << setw(6) << min_time; // shows results
cout << ", mean=" << setw(6) << mean_time;
cout << ", max= " << setw(6) << max_time << endl;
return 0;
}