c - Parallel slower than serial -

May 15, 2014

issue

hello everyone, have got program (from net) intend speed converting parallel version use of pthreads. surprisingly though, runs slower serial version. below program:

# include <stdio.h>  //fast square root algorithm double asmsqrt(double x)  {   __asm__ ("fsqrt" : "+t" (x));   return x; }  //test if number prime bool isprime(int n) {        if (n <= 1) return false;     if (n == 2) return true;     if (n%2 == 0) return false;      int sqrtn,i;     sqrtn = asmsqrt(n);      (i = 3; <= sqrtn; i+=2) if (n%i == 0) return false;     return true; }  //number generator iterated 0 n int main() {     n = 1000000; //maximum number     int k,j;      (j = 0; j<= n; j++)     {         if(isprime(j) == 1) k++;         if(j == n) printf("count: %d\n",k);     }     return 0; }

first attempt parallelization

i let pthread manage for loop

# include <stdio.h> . .  int main() {     .     .     //----->pthread code here<----     (j = 0; j<= n; j++)     {         if(isprime(j) == 1) k++;         if(j == n) printf("count: %d\n",k);     }     return 0; }

well, runs slower serial one

second attempt

i divided for loop two threads , run them in parallel using pthreads

however, still runs slower, intending may run twice fast or faster. not!

these parallel code way:

# include <stdio.h> # include <pthread.h> # include <cmath>  # define nthreads 2  pthread_mutex_t mutex1 = pthread_mutex_initializer; int k = 0;  double asmsqrt(double x)  {   __asm__ ("fsqrt" : "+t" (x));   return x; }  struct arg_struct {     int initialprime;     int nextprime; };  bool isprime(int n) {        if (n <= 1) return false;      if (n == 2) return true;      if (n%2 == 0) return false;      int sqrtn,i;     sqrtn = asmsqrt(n);      (i = 3; <= sqrtn; i+=2) if (n%i == 0) return false;      return true; }  void *parallel_launcher(void *arguments) {     struct arg_struct *args = (struct arg_struct *)arguments;      int j = args -> initialprime;     int n = args -> nextprime - 1;      (j = 0; j<= n; j++)     {         if(isprime(j) == 1)         {             printf("this prime: %d\n",j); pthread_mutex_lock( &mutex1 );             k++; pthread_mutex_unlock( &mutex1 );         }          if(j == n) printf("count: %d\n",k);     } pthread_exit(null); }  int main() {     int f = 100000000;     int m;      pthread_t thread_id[nthreads];     struct arg_struct args;      int rem = (f+1)%nthreads;     int n = floor((f+1)/nthreads);      for(int h = 0; h < nthreads; h++)     {         if(rem > 0)         {             m = n + 1;             rem-= 1;         }         else if(rem == 0)         {             m = n;         }          args.initialprime = args.nextprime;         args.nextprime = args.initialprime + m;          pthread_create(&thread_id[h], null, &parallel_launcher, (void *)&args);         pthread_join(thread_id[h], null);     }    // printf("count: %d\n",k);     return 0; }

note: os: fedora 21 x86_64, compiler: gcc-4.4, processor: intel core i5 (2 physical core, 4 logical), mem: 6 gb, hdd: 340 gb,

you need split range examining primes n parts, n number of threads.

the code each thread runs becomes:

typedef struct start_end {     int start;     int end; } start_end_t;  int find_primes_in_range(void *in) {     start_end_t *start_end = (start_end_t *) in;      int num_primes = 0;     (int j = start_end->start; j <= start_end->end; j++) {        if (isprime(j) == 1)            num_primes++;     }     pthread_exit((void *) num_primes; }

the main routine first starts threads call find_primes_in_range, calls pthread_join each thread. sums values returned find_primes_in_range. avoids locking , unlocking shared count variable.

this parallelize work, amount of work per thread not equal. can addressed more complicated.

Search This Blog

ANgular

c - Parallel slower than serial -

Comments

Post a Comment

Popular posts from this blog

c# - Validate object ID from GET to POST -

node.js - Custom Model Validator SailsJS -

php - Find a regex to take part of Email -