1  function polymodel = polyfitn(indepvar,depvar,modelterms)


2  % polyfitn: fits a general polynomial regression model in n dimensions


3  % usage: polymodel = polyfitn(indepvar,depvar,modelterms)


4  %


5  % Polyfitn fits a polynomial regression model of one or more


6  % independent variables, of the general form:


7  %


8  % z = f(x,y,...) + error


9  %


10  % arguments: (input)


11  % indepvar  (n x p) array of independent variables as columns


12  % n is the number of data points


13  % p is the dimension of the independent variable space


14  %


15  % IF n == 1, then I will assume there is only a


16  % single independent variable.


17  %


18  % depvar  (n x 1 or 1 x n) vector  dependent variable


19  % length(depvar) must be n.


20  %


21  % Only 1 dependent variable is allowed, since I also


22  % return statistics on the model.


23  %


24  % modelterms  defines the terms used in the model itself


25  %


26  % IF modelterms is a scalar integer, then it designates


27  % the overall order of the model. All possible terms


28  % up to that order will be employed. Thus, if order


29  % is 2 and p == 2 (i.e., there are two variables) then


30  % the terms selected will be:


31  %


32  % { x^2, x*y, x, y^2, y, constant}


33  %


34  % Beware the consequences of high order polynomial


35  % models.


36  %


37  % IF modelterms is a (k x p) numeric array, then each


38  % row of this array designates the exponents of one


39  % term in the model. Thus to designate a model with


40  % the above list of terms, we would define modelterms as


41  %


42  % modelterms = [0 0;1 0;2 0;0 1;1 1;0 2]


43  %


44  % If modelterms is a character string, then it will be


45  % parsed as a list of terms in the regression model.


46  % The terms will be assume to be separated by a comma


47  % or by blanks. The variable names used must be legal


48  % matlab variable names. Exponents in the model may


49  % may be any real number, positive or negative.


50  %


51  % For example, 'constant, x, y, x*y, x^2, x*y*y'


52  % will be parsed as a model specification as if you


53  % had supplied:


54  % modelterms = [0 0;1 0;0 1;1 1;2 0;1 2]


55  %


56  % The word 'constant' is a keyword, and will denote a


57  % constant terms in the model. Variable names will be


58  % sorted in alphabetical order as defined by sort.


59  % This order will assign them to columns of the


60  % independent array. Note that 'xy' will be parsed as


61  % a single variable name, not as the product of x and y.


62  %


63  % If modelterms is a cell array, then it will be taken


64  % to be a list of character terms. Similarly,


65  %


66  % {'constant', 'x', 'y', 'x*y', 'x^2', 'x*y^1'}


67  %


68  % will be parsed as a model specification as if you


69  % had supplied:


70  %


71  % modelterms = [0 0;1 0;0 1;1 1;2 0;1 1]


72  %


73  % Arguments: (output)


74  % polymodel  A structure containing the regression model


75  % polymodel.ModelTerms = list of terms in the model


76  % polymodel.Coefficients = regression coefficients


77  % polymodel.ParameterVar = variances of model coefficients


78  % polymodel.ParameterStd = standard deviation of model coefficients


79  % polymodel.R2 = R^2 for the regression model


80  % polymodel.AdjustedR2 = Adjusted R^2 for the regression model


81  % polymodel.RMSE = Root mean squared error


82  % polymodel.VarNames = Cell array of variable names


83  % as parsed from a char based model specification.


84  %


85  % Note 1: Because the terms in a general polynomial


86  % model can be arbitrarily chosen by the user, I must


87  % package the erms and coefficients together into a


88  % structure. This also forces use of a special evaluation


89  % tool: polyvaln.


90  %


91  % Note 2: A polymodel can be evaluated for any set


92  % of values with the function polyvaln. However, if


93  % you wish to manipulate the result symbolically using


94  % my own sympoly tools, this structure can be converted


95  % to a sympoly using the function polyn2sympoly. There


96  % is also a polyn2sym tool, for those who prefer the


97  % symbolic TB.


98  %


99  % Note 3: When no constant term is included in the model,


100  % the traditional R^2 can be negative. This case is


101  % identified, and then a more appropriate computation


102  % for R^2 is then used.


103  %


104  % Note 4: Adjusted R^2 accounts for changing degrees of


105  % freedom in the model. It CAN be negative, and will always


106  % be less than the traditional R^2 values.


107  %


108  % Find my sympoly toolbox here:


109  % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=9577&objectType=FILE


110  %


111  % See also: polyvaln, polyfit, polyval, polyn2sympoly, sympoly


112  %


113  % Author: John D'Errico


114  % Release: 2.0


115  % Release date: 2/19/06


116 


117  if nargin<1


118  help polyfitn


119  return


120  end


121 


122  % get sizes, test for consistency


123  [n,p] = size(indepvar);


124  if n == 1


125  indepvar = indepvar';


126  [n,p] = size(indepvar);


127  end


128  [m,q] = size(depvar);


129  if m == 1


130  depvar = depvar';


131  [m,q] = size(depvar);


132  end


133  % only 1 dependent variable allowed at a time


134  if q~=1


135  error 'Only 1 dependent variable allowed at a time.'


136  end


137 


138  if n~=m


139  error 'indepvar and depvar are of inconsistent sizes.'


140  end


141 


142  % Automatically scale the independent variables to unit variance


143  stdind = sqrt(diag(cov(indepvar)));


144  if any(stdind==0)


145  warning 'Constant terms in the model must be entered using modelterms'


146  stdind(stdind==0) = 1;


147  end


148  % scaled variables


149  indepvar_s = indepvar*diag(1./stdind);


150 


151  % do we need to parse a supplied model?


152  if iscell(modelterms)  ischar(modelterms)


153  [modelterms,varlist] = parsemodel(modelterms,p);


154  if size(modelterms,2) < p


155  modelterms = [modelterms, zeros(size(modelterms,1),p  size(modelterms,2))];


156  end


157  elseif length(modelterms) == 1


158  % do we need to generate a set of modelterms?


159  [modelterms,varlist] = buildcompletemodel(modelterms,p);


160  elseif size(modelterms,2) ~= p


161  error 'ModelTerms must be a scalar or have the same # of columns as indepvar'


162  else


163  varlist = repmat({''},1,p);


164  end


165  nt = size(modelterms,1);


166 


167  % check for replicate terms


168  if nt>1


169  mtu = unique(modelterms,'rows');


170  if size(mtu,1)<nt


171  warning 'Replicate terms identified in the model.'


172  end


173  end


174 


175  % build the design matrix


176  M = ones(n,nt);


177  scalefact = ones(1,nt);


178  for i = 1:nt


179  for j = 1:p


180  M(:,i) = M(:,i).*indepvar_s(:,j).^modelterms(i,j);


181  scalefact(i) = scalefact(i)/(stdind(j)^modelterms(i,j));


182  end


183  end


184 


185  % estimate the model using QR. do it this way to provide a


186  % covariance matrix when all done. Use a pivoted QR for


187  % maximum stability.


188  [Q,R,E] = qr(M,0);


189 


190  polymodel.ModelTerms = modelterms;


191  polymodel.Coefficients(E) = R\(Q'*depvar);


192  yhat = M*polymodel.Coefficients(:);


193 


194  % recover the scaling


195  polymodel.Coefficients=polymodel.Coefficients.*scalefact;


196 


197  % variance of the regression parameters


198  s = norm(depvar  yhat);


199  if n > nt


200  Rinv = R\eye(nt);


201  Var(E) = s^2*sum(Rinv.^2,2)/(nnt);


202  polymodel.ParameterVar = Var.*(scalefact.^2);


203  polymodel.ParameterStd = sqrt(polymodel.ParameterVar);


204  else


205  % we cannot form variance or standard error estimates


206  % unless there are at least as many data points as


207  % parameters to estimate.


208  polymodel.ParameterVar = inf(1,nt);


209  polymodel.ParameterStd = inf(1,nt);


210  end


211 


212  % degrees of freedom


213  polymodel.DoF = n  nt;


214 


215  % coefficient/sd ratio for a pvalue


216  t = polymodel.Coefficients./polymodel.ParameterStd;


217 


218  % twice the upper tail probability from the t distribution,


219  % as a transformation from an incomplete beta. This provides


220  % a twosided test for the corresponding coefficient.


221  % I could have used tcdf, if I wanted to presume the


222  % stats toolbox was present. Of course, then regstats is


223  % an option. In that case, the comparable result would be


224  % found in: STATS.tstat.pval


225  polymodel.p = betainc(polymodel.DoF./(t.^2 + polymodel.DoF),polymodel.DoF/2,1/2);


226 


227  % R^2


228  % is there a constant term in the model? If not, then


229  % we cannot use the standard R^2 computation, as it


230  % frequently yields negative values for R^2.


231  if any((M(1,:) ~= 0) & all(diff(M,1,1) == 0,1))


232  % we have a constant term in the model, so the


233  % traditional R^2 form is acceptable.


234  polymodel.R2 = max(0,1  (s/norm(depvarmean(depvar)) )^2);


235  % compute adjusted R^2, taking into account the number of


236  % degrees of freedom


237  polymodel.AdjustedR2 = 1  (1  polymodel.R2).*((n  1)./(n  nt));


238  else


239  % no constant term was found in the model


240  polymodel.R2 = max(0,1  (s/norm(depvar))^2);


241  % compute adjusted R^2, taking into account the number of


242  % degrees of freedom


243  polymodel.AdjustedR2 = 1  (1  polymodel.R2).*(n./(n  nt));


244  end


245 


246  % RMSE


247  polymodel.RMSE = sqrt(mean((depvar  yhat).^2));


248 


249  % if a character 'model' was supplied, return the list


250  % of variables as parsed out


251  polymodel.VarNames = varlist;


252 


253  % ==================================================


254  % =============== begin subfunctions ===============


255  % ==================================================


256  function [modelterms,varlist] = buildcompletemodel(order,p)


257  %


258  % arguments: (input)


259  % order  scalar integer, defines the total (maximum) order


260  %


261  % p  scalar integer  defines the dimension of the


262  % independent variable space


263  %


264  % arguments: (output)


265  % modelterms  exponent array for the model


266  %


267  % varlist  cell array of character variable names


268 


269  % build the exponent array recursively


270  if p == 0


271  % terminal case


272  modelterms = [];


273  elseif (order == 0)


274  % terminal case


275  modelterms = zeros(1,p);


276  elseif (p==1)


277  % terminal case


278  modelterms = (order:1:0)';


279  else


280  % general recursive case


281  modelterms = zeros(0,p);


282  for k = order:1:0


283  t = buildcompletemodel(orderk,p1);


284  nt = size(t,1);


285  modelterms = [modelterms;[repmat(k,nt,1),t]];


286  end


287  end


288 


289  % create a list of variable names for the variables on the fly


290  varlist = cell(1,p);


291  for i = 1:p


292  varlist{i} = ['X',num2str(i)];


293  end


294 


295 


296  % ==================================================


297  function [modelterms,varlist] = parsemodel(model,p);


298  %


299  % arguments: (input)


300  % model  character string or cell array of strings


301  %


302  % p  number of independent variables in the model


303  %


304  % arguments: (output)


305  % modelterms  exponent array for the model


306 


307  modelterms = zeros(0,p);


308  if ischar(model)


309  model = deblank(model);


310  end


311 


312  varlist = {};


313  while ~isempty(model)


314  if iscellstr(model)


315  term = model{1};


316  model(1) = [];


317  else


318  [term,model] = strtok(model,' ,');


319  end


320 


321  % We've stripped off a model term. Now parse it.


322 


323  % Is it the reserved keyword 'constant'?


324  if strcmpi(term,'constant')


325  modelterms(end+1,:) = 0;


326  else


327  % pick this term apart


328  expon = zeros(1,p);


329  while ~isempty(term)


330  vn = strtok(term,'*/^. ,');


331  k = find(strncmp(vn,varlist,length(vn)));


332  if isempty(k)


333  % its a variable name we have not yet seen


334 


335  % is it a legal name?


336  nv = length(varlist);


337  if ismember(vn(1),'1234567890_')


338  error(['Variable is not a valid name: ''',vn,''''])


339  elseif nv>=p


340  error 'More variables in the model than columns of indepvar'


341  end


342 


343  varlist{nv+1} = vn;


344 


345  k = nv+1;


346  end


347  % variable must now be in the list of vars.


348 


349  % drop that variable from term


350  i = strfind(term,vn);


351  term = term((i+length(vn)):end);


352 


353  % is there an exponent?


354  eflag = false;


355  if strncmp('^',term,1)


356  term(1) = [];


357  eflag = true;


358  elseif strncmp('.^',term,2)


359  term(1:2) = [];


360  eflag = true;


361  end


362 


363  % If there was one, get it


364  ev = 1;


365  if eflag


366  ev = sscanf(term,'%f');


367  if isempty(ev)


368  error 'Problem with an exponent in parsing the model'


369  end


370  end


371  expon(k) = expon(k) + ev;


372 


373  % next monomial subterm?


374  k1 = strfind(term,'*');


375  if isempty(k1)


376  term = '';


377  else


378  term(k1(1)) = ' ';


379  end


380 


381  end


382 


383  modelterms(end+1,:) = expon;


384 


385  end


386 


387  end


388 


389  % Once we have compiled the list of variables and


390  % exponents, we need to sort them in alphabetical order


391  [varlist,tags] = sort(varlist);


392  modelterms = modelterms(:,tags); 
