/*
	**************************
	Patrick Schmid
	pds2
	20 March 2006
	Lab 15 Solution
	Patrick Schmid
	Section 10 & 11
	**************************
	Purpose: Read CO2 concentration for a given number of years from text file and do linear regression.
		     Use a 2-D array to store the data table
	Algorithm: For loop to read year and concentration. Void function to set up regression and
		       calculate r-squared
*/

#include <fstream.h>
#include <iomanip.h>
#include <math.h>

//prototypes
void linreg(double data[][2], int n, double& slope, double& intercept, double& rsqrd);
void linreg(double data[][2], int n, double results[3]);

void main() {
	double data[10][2]; //declare array to store data
	int i; //declare for loop counter
	char line[80]; //buffer to read line
	double slope; //slope for linear regression
	double intercept; //intercept for linear regression
	double rsqrd; //r-squared for linear regression


	//open file
	ifstream in ("co2.txt", ios::in);

	//skip over first line
	in.getline(line, 80);

	//echo print setup
	cout<<"Year     CO2 concentration\n";
	cout<<"--------------------------\n";

	//read data from file using for loop (text file has 10 rows)
	for (i=0; i<10; i++) {

		in>>data[i][0]>>data[i][1]; //read text file row into appropriate array row

		cout<<data[i][0]<<"     "<<data[i][1]<<endl; //echo print
	
	}
	cout<<endl;

	//call void function to get linear regression variables
	linreg(data, 10, slope, intercept, rsqrd);

	//ouput regression variables
	cout<<"Slope: "<<slope<<endl;
	cout<<"Intercept: "<<intercept<<endl;
	cout<<"R-squared: "<<rsqrd<<endl;
	
	//output CO2 concentration in year 2020
	//just plug in the data into the equation for a line:
	//CO2Concentration=slope * year + intercept
	cout<<"CO2 concentration in year 2020: "<<slope*2020+intercept<<endl;
	
	//double the value of the CO2-concetration in 1957
	double targetCO2value=data[0][1]*2;

	//using the equation of a line, we can determine the year
	//y = mx + b
	//then y - b = mx
	//and finally, x = (y - b) / m
	//why am I using an int?
	int yearForTargetValue = (int) ((targetCO2value - intercept) / slope);

	cout<<"Year in which the CO2-concentration reaches double the value of 1957: "
		<<yearForTargetValue<<endl;

}

//if we want to use our exisiting linreg function, but store the
//results in an array, we can write another function
//that calls our original one
//this function is not used in this program, but can help you
//with your pa4
void linreg(double data[][2], int n, double results[3]) {
	linreg(data, n, results[0], results[1], results[2]);		
}

//void function to calculate the inverses of the CO2 concentrations
//and the average CO2 concentration
//generic linear regression function using a 2-D array with n number of rows
void linreg(double data[][2], int n, double& slope, double& intercept, double& rsqrd) {

	//pseudo-code for slope using the more complicated formulas:
	//slope = (Sum (Xi * Yi) - YAverage * Sum(Xi)) / (Sum (Xi^2) - XAverage * Sum(Xi))
	//pseudo-code for intercept:
	//intercept = YAverage - slope * XAverage

	//those two formulas imply that we should calculate the following things first
	double sumYi=0;    //sum of all y values. Not needed in the formulas, but
					 //we will need it to get the averages
	double sumXi=0;    //sum of all x values
	double sumXiSquared=0; //sum of all x^2 values
	double sumYiSquared=0; //sum of all y^2 values (needed for r-squared)
	double sumXiYi=0;  //sum of all Xi * Yi values
	double xAverage; //average of all x values
	double yAverage; //average of all y values

	//let's calculate all sums first in a for loop
	//remember that we had to initialize all sums to 0!
	//x-axis: Year
	//y-axis: CO2 concentration
	for (int i=0; i<n; i++) {
		sumXi+=data[i][0];
		sumYi+=data[i][1];
		sumXiSquared+=pow(data[i][0],2);
		sumYiSquared+=pow(data[i][1],2);
		sumXiYi+=data[i][0] * data[i][1];
	}

	//calculate averages
	xAverage = sumXi / n;
	yAverage = sumYi / n;

	//calculate slope
	slope = (sumXiYi - yAverage * sumXi) / (sumXiSquared - xAverage * sumXi);
	
	//calculate intercept
	intercept = yAverage - slope * xAverage;

	//let's figure out r-squared
	double rNumerator; //Numerator of r 
	double rDenominator;  //denominator of r
	
	//using formulas from lecture, we get
	rNumerator = n * sumXiYi - sumXi * sumYi;
	rDenominator = sqrt (n * sumXiSquared - sumXi * sumXi) *
				   sqrt (n * sumYiSquared - sumYi * sumYi);

	//calculate r-squared
	rsqrd = pow(rNumerator / rDenominator, 2);
}
