/*
	**************************
	Patrick Schmid
	pds2
	15 March 2006
	Lab 14 Solution
	Patrick Schmid
	Section 10 & 11
	**************************
	Purpose: Read CO2 concentration for a given number of years from text file and do linear regression
	Algorithm: For loop to read year and concentration. Void function to set up regression and
		       calculate r-squared
*/

#include <fstream.h>
#include <iomanip.h>
#include <math.h>

//prototypes
void linreg(double co2[ ], double year[ ], double& slope, double& intercept, double& rsqrd);

void main() {
	double year[10], co2[10]; //declare arrays with 10 items each
	int i; //declare for loop counter
	char line[80]; //buffer to read line
	double slope; //slope for linear regression
	double intercept; //intercept for linear regression
	double rsqrd; //r-squared for linear regression


	//open file
	ifstream in ("co2.txt", ios::in);

	//skip over first line
	in.getline(line, 80);

	//echo print setup
	cout<<"Year     CO2 concentration\n";
	cout<<"--------------------------\n";

	//read data from file using for loop (text file has 10 rows)
	for (i=0; i<10; i++) {

		in>>year[i]>>co2[i]; //read text file row into appropriate array row

		cout<<year[i]<<"     "<<co2[i]<<endl; //echo print
	
	}
	cout<<endl;

	//call void function to get linear regression variables
	linreg(co2, year, slope, intercept, rsqrd);

	//ouput regression variables
	cout<<"Slope: "<<slope<<endl;
	cout<<"Intercept: "<<intercept<<endl;
	cout<<"R-squared: "<<rsqrd<<endl;
	
	//output CO2 concentration in year 2020
	//just plug in the data into the equation for a line:
	//CO2Concentration=slope * year + intercept
	cout<<"CO2 concentration in year 2020: "<<slope*2020+intercept<<endl;
	
}

//void function to calculate the inverses of the CO2 concentrations
//and the average CO2 concentration
void linreg(double co2[ ], double year[ ], double& slope, double& intercept, double& rsqrd) {

	//pseudo-code for slope using the more complicated formulas:
	//slope = (Sum (Xi * Yi) - YAverage * Sum(Xi)) / (Sum (Xi^2) - XAverage * Sum(Xi))
	//pseudo-code for intercept:
	//intercept = YAverage - slope * XAverage

	//those two formulas imply that we should calculate the following things first
	double sumYi=0;    //sum of all y values. Not needed in the formulas, but
					 //we will need it to get the averages
	double sumXi=0;    //sum of all x values
	double sumXiSquared=0; //sum of all x^2 values
	double sumYiSquared=0; //sum of all y^2 values (needed for r-squared)
	double sumXiYi=0;  //sum of all Xi * Yi values
	double xAverage; //average of all x values
	double yAverage; //average of all y values

	//let's calculate all sums first in a for loop
	//remember that we had to initialize all sums to 0!
	//x-axis: Year
	//y-axis: CO2 concentration
	for (int i=0; i<10; i++) {
		sumYi+=co2[i];
		sumXi+=year[i];
		sumXiSquared+=pow(year[i],2);
		sumYiSquared+=pow(co2[i],2);
		sumXiYi+=co2[i] * year[i];
	}

	//calculate averages
	xAverage = sumXi / 10;
	yAverage = sumYi / 10;

	//calculate slope
	slope = (sumXiYi - yAverage * sumXi) / (sumXiSquared - xAverage * sumXi);
	
	//calculate intercept
	intercept = yAverage - slope * xAverage;

	//let's figure out r-squared
	double rNumerator; //Numerator of r 
	double rDenominator;  //denominator of r
	
	//using formulas from lecture, we get
	rNumerator = 10 * sumXiYi - sumXi * sumYi;
	rDenominator = sqrt (10 * sumXiSquared - sumXi * sumXi) *
				   sqrt (10 * sumYiSquared - sumYi * sumYi);

	//calculate r-squared
	rsqrd = pow(rNumerator / rDenominator, 2);
}
