//========================================================= file = lr.c ====
//=  Program to calculate linear regression equation for <x, y> data       =
//==========================================================================
//=  Notes:                                                                =
//=    1)  The method of leas squares is used to performance linear        =
//=        regression on <x, y> data.  The equation of the line and        =
//=        the coefficient of dtermination are computed.                   =
//=    2)  The input file contains <x, y> pairs separated by commas        =
//         and/or whitespace.  No comments are allowed in the input file.  =
//=    3)  See, R. Jain, "The Art of Computer Systems Performance          =
//-        Analysis," John Wiley & Sons, 1991 (pp. 221- 228).              =
//=------------------------------------------------------------------------=
//= Example "in.dat" file (from Jain, page 224):                           =
//=                                                                        =
//=    14, 2                                                               =
//=    16, 5                                                               =
//=    27, 7                                                               =
//=    42, 9                                                               =
//=    39, 10                                                              =
//=    50, 13                                                              =
//=    83, 20                                                              =
//=------------------------------------------------------------------------=
//= Example output (for above "in.dat"):                                   =
//=                                                                        =
//=   ---------------------------------------------------- lr.c -----      =
//=     Number of <x, y> pairs = 7                                         =
//=     Y = 0.243756*x + -0.008282   (R^2 = 0.971471)                      =
//=   ---------------------------------------------------------------      =
//=------------------------------------------------------------------------=
//=  Build:  bcc32 lr.c, cl lr.c, gcc lr.c -lm                             =
//=------------------------------------------------------------------------=
//=  Execute: lr < in.dat                                                  =
//=------------------------------------------------------------------------=
//=  Contact: Kenneth J. Christensen                                       =
//=           University of South Florida                                  =
//=           WWW: http://www.csee.usf.edu/~christen                       =
//=           Email: christen@csee.usf.edu                                 =
//=------------------------------------------------------------------------=
//=  History:  VJE (02/13/99) - Genesis                                    =
//=            KJC (03/09/99) - Minor clean-up                             =
//=            KJC (05/25/00) - Added include stdlib to make atof() work   =
//==========================================================================

//----- Include files ------------------------------------------------------
#include <stdio.h>   // Needed for printf() and feof()
#include <stdlib.h>  // Needed for atof()
#include <math.h>    // Needed for sqrt() and pow()

//==========================================================================
//=  Main program                                                          =
//==========================================================================
void main(void)
{
  double   x;                 // Value of x read in from the file
  double   y;                 // Value of y read in from the file
  char     instring[80];      // Temporary input string
  long int count;             // Counter for number of <x, y> pairs
  double   accum_x;           // Accumulator for the sum of the x's
  double   accum_y;           // Accumulator for the sum of the y's
  double   accum_xy;          // Accumulator for the sum of (x*y)
  double   accum_x_squared;   // Accumulator for the sum of the x's squared
  double   accum_y_squared;   // Accumulator for the sum of the y's squared
  double   numerator;         // Work variable for numerator
  double   denominator;       // Work variable for denominator
  double   linear_corr;       // Linear correlation coefficient
  double   slope;             // Slope of the linear regression
  double   intercept;         // Intercept of the linear regression

  // Output a banner
  printf("---------------------------------------------------- lr.c ----- \n");

  // Main loop to read <x, y> values and compute accumulated values
  count = 0;
  accum_x = accum_y = accum_xy = accum_x_squared = accum_y_squared = 0.0;
  while (!feof(stdin))
  {
    // Read the input values from stdin checking for feof() condition
    scanf("%s", instring);
    if (feof(stdin)) break;
    x = atof(instring);
    scanf("%s", instring);
    y = atof(instring);
    count++;

    // Accumulate the applicable values from the input file
    accum_x         = accum_x + x;             // Sum of x
    accum_y         = accum_y + y;             // Sum of y
    accum_xy        = accum_xy + (x*y);        // Sum of x*y
    accum_x_squared = accum_x_squared + (x*x); // Sum of x squared
    accum_y_squared = accum_y_squared + (y*y); // Sum of y squared
  }

  // Calculate the slope and intercept of the linear regression line
  numerator   = (count * accum_xy) - (accum_x * accum_y);
  denominator = (count * accum_x_squared) - (accum_x * accum_x);
  slope       = numerator / denominator;
  intercept   = (accum_y / count) - (slope * (accum_x / count));

  // Calculate the linear correlation coefficient
  numerator   = (count * accum_xy) - (accum_x * accum_y);
  denominator = sqrt((count * accum_x_squared) - (accum_x * accum_x)) *
                sqrt((count * accum_y_squared) - (accum_y * accum_y));
  linear_corr = numerator / denominator;

  // Output results (R^2 is the coefficient of determiniation)
  printf("  Number of <x, y> pairs = %ld \n", count);
  printf("  Y = %f*x + %f   (R^2 = %f) \n",
    slope, intercept, pow(linear_corr, 2.0));
  printf("--------------------------------------------------------------- \n");
}

