#include <iostream.h>
#include <timeit.h>

/* This program demonstrates the benefits of using a 'restrict'
 * keyword when programming.  The restrict keyword can be used
 * to describe to the compiler when two pointers do not overlap
 * in memory. The potential benefits of the restrict keyword
 * when used to elimiate pointer aliasing is most evident
 * on pipelined CPU architectures. Non-pipelined systems will
 * not show as much benefit.
 *
 * In this example, we'll modify the add_gain function in two
 * fashions to demonstrate the effects of aliasing and the
 * ability of the restrict keyword to eliminte those effects.
 * Four versions of add_gain exist:
 *  - add_gain: base, no restrict keyword, not-unrolled
 *  - restrict_add_gain: add_gain with restrict keyword, not-unrolled
 *  - unroll_add_gain: no restrict keyword, loop-unrolled
 *  - restrict_unroll_add_gain: restrict keyword, loop-unrolled
 * These four functions allow quick comparison of the relative
 * effects of loop-unrolling, the restrict keyword, and native
 * platform, aliased-memory performance.
 *
 * Note! This example only demonstrates the 'restrict' keyword
 * under IRIX.
 */

// NUM must be a multiple of 8 since we  unroll by 8 below.
#ifdef LINUX
const int NUM = 800000; /* linux chokes with the larger allocation */
#else
const int NUM = 4000000;
#endif

/* define restrict on those platforms that can use it */
#ifdef IRIX 
#define RESTRICT restrict
#else
#define RESTRICT
#endif

// Use restrict key word 
void restrict_add_gain( float * RESTRICT p1, float * RESTRICT p2,
		        float gain) 
{
   int i;
   for (i=0; i< NUM; i++)
      p1[i] = p2[i] * gain;
}

// Use classic unrolling technique 
void unroll_restrict_add_gain(float * RESTRICT p1, float* RESTRICT p2, float gain) 
{
   int i;

   // I chose 8 as an unrolling factor.  Nothing special
   // about 8 - I could have used any other number.  
   for (i=0; i< NUM; i+=8) 
   {
      p1[i  ] = p2[i  ] * gain;
      p1[i+1] = p2[i+1] * gain;
      p1[i+2] = p2[i+2] * gain;
      p1[i+3] = p2[i+3] * gain;
      p1[i+4] = p2[i+4] * gain;
      p1[i+5] = p2[i+5] * gain;
      p1[i+6] = p2[i+6] * gain;
      p1[i+7] = p2[i+7] * gain;
   }
}

// Use classic unrolling technique 
void unroll_add_gain(float *p1, float* p2, float gain) 
{
   int i;

   // I chose 8 as an unrolling factor.  Nothing special
   // about 8 - I could have used any other number.  
   for (i=0; i< NUM; i+=8) 
   {
      p1[i  ] = p2[i  ] * gain;
      p1[i+1] = p2[i+1] * gain;
      p1[i+2] = p2[i+2] * gain;
      p1[i+3] = p2[i+3] * gain;
      p1[i+4] = p2[i+4] * gain;
      p1[i+5] = p2[i+5] * gain;
      p1[i+6] = p2[i+6] * gain;
      p1[i+7] = p2[i+7] * gain;
   }
}

// Original version of add_gain
void add_gain(float *p1, float* p2, float gain) 
{
   int i;
   for (i=0; i< NUM; i++)
      p1[i] = p2[i] * gain;
}

// resets the arrays to their original values
void reset_array(float *p1, float* p2) 
{
   int i;
   for (i=0;i<NUM;i++) 
   {
      p1[i]=0;
      p2[i]=1;
   }
}

// main program
int main()
{
   float p1[NUM],p2[NUM];
   timeobj *tt = timeit_new();

   reset_array(p1,p2);

   cout << "add_gain (aliased pointers, non-unrolled loops)" << endl;
   timeit_start( tt );
   add_gain(p1,p2,5.0);
   timeit_stop( tt );
   cout << "elapsed time (s): " << timeit_getf( tt, timeit_seconds) << endl;

   reset_array(p1,p2);
   cout << "unroll_add_gain (aliased pointers, loop unrolled)" << endl;
   timeit_start( tt );
   unroll_add_gain(p1,p2,6.0);
   timeit_stop( tt );
   cout << "elapsed time (s): " << timeit_getf( tt, timeit_seconds) << endl;

   reset_array(p1,p2);
   cout << "restrict_add_gain (restrict aliasing, non-unrolled loops)"
     << endl;
   timeit_start( tt );
   restrict_add_gain(p1,p2,7.0); 
   timeit_stop( tt );
   cout << "elapsed time (s): " << timeit_getf( tt, timeit_seconds) << endl;

   reset_array(p1,p2);
   cout << "unroll_restrict_add_gain (restrict aliasing, unrolled loops)"
     << endl;
   timeit_start( tt );
   unroll_restrict_add_gain(p1,p2,7.0); 
   timeit_stop( tt );
   cout << "elapsed time (s): " << timeit_getf( tt, timeit_seconds) << endl;

   timeit_delete( tt );
}
