[877] | 1 | function polymodel = polyfitn(indepvar,depvar,modelterms)
|
---|
| 2 | % polyfitn: fits a general polynomial regression model in n dimensions
|
---|
| 3 | % usage: polymodel = polyfitn(indepvar,depvar,modelterms)
|
---|
| 4 | %
|
---|
| 5 | % Polyfitn fits a polynomial regression model of one or more
|
---|
| 6 | % independent variables, of the general form:
|
---|
| 7 | %
|
---|
| 8 | % z = f(x,y,...) + error
|
---|
| 9 | %
|
---|
| 10 | % arguments: (input)
|
---|
| 11 | % indepvar - (n x p) array of independent variables as columns
|
---|
| 12 | % n is the number of data points
|
---|
| 13 | % p is the dimension of the independent variable space
|
---|
| 14 | %
|
---|
| 15 | % IF n == 1, then I will assume there is only a
|
---|
| 16 | % single independent variable.
|
---|
| 17 | %
|
---|
| 18 | % depvar - (n x 1 or 1 x n) vector - dependent variable
|
---|
| 19 | % length(depvar) must be n.
|
---|
| 20 | %
|
---|
| 21 | % Only 1 dependent variable is allowed, since I also
|
---|
| 22 | % return statistics on the model.
|
---|
| 23 | %
|
---|
| 24 | % modelterms - defines the terms used in the model itself
|
---|
| 25 | %
|
---|
| 26 | % IF modelterms is a scalar integer, then it designates
|
---|
| 27 | % the overall order of the model. All possible terms
|
---|
| 28 | % up to that order will be employed. Thus, if order
|
---|
| 29 | % is 2 and p == 2 (i.e., there are two variables) then
|
---|
| 30 | % the terms selected will be:
|
---|
| 31 | %
|
---|
| 32 | % { x^2, x*y, x, y^2, y, constant}
|
---|
| 33 | %
|
---|
| 34 | % Beware the consequences of high order polynomial
|
---|
| 35 | % models.
|
---|
| 36 | %
|
---|
| 37 | % IF modelterms is a (k x p) numeric array, then each
|
---|
| 38 | % row of this array designates the exponents of one
|
---|
| 39 | % term in the model. Thus to designate a model with
|
---|
| 40 | % the above list of terms, we would define modelterms as
|
---|
| 41 | %
|
---|
| 42 | % modelterms = [0 0;1 0;2 0;0 1;1 1;0 2]
|
---|
| 43 | %
|
---|
| 44 | % If modelterms is a character string, then it will be
|
---|
| 45 | % parsed as a list of terms in the regression model.
|
---|
| 46 | % The terms will be assume to be separated by a comma
|
---|
| 47 | % or by blanks. The variable names used must be legal
|
---|
| 48 | % matlab variable names. Exponents in the model may
|
---|
| 49 | % may be any real number, positive or negative.
|
---|
| 50 | %
|
---|
| 51 | % For example, 'constant, x, y, x*y, x^2, x*y*y'
|
---|
| 52 | % will be parsed as a model specification as if you
|
---|
| 53 | % had supplied:
|
---|
| 54 | % modelterms = [0 0;1 0;0 1;1 1;2 0;1 2]
|
---|
| 55 | %
|
---|
| 56 | % The word 'constant' is a keyword, and will denote a
|
---|
| 57 | % constant terms in the model. Variable names will be
|
---|
| 58 | % sorted in alphabetical order as defined by sort.
|
---|
| 59 | % This order will assign them to columns of the
|
---|
| 60 | % independent array. Note that 'xy' will be parsed as
|
---|
| 61 | % a single variable name, not as the product of x and y.
|
---|
| 62 | %
|
---|
| 63 | % If modelterms is a cell array, then it will be taken
|
---|
| 64 | % to be a list of character terms. Similarly,
|
---|
| 65 | %
|
---|
| 66 | % {'constant', 'x', 'y', 'x*y', 'x^2', 'x*y^-1'}
|
---|
| 67 | %
|
---|
| 68 | % will be parsed as a model specification as if you
|
---|
| 69 | % had supplied:
|
---|
| 70 | %
|
---|
| 71 | % modelterms = [0 0;1 0;0 1;1 1;2 0;1 -1]
|
---|
| 72 | %
|
---|
| 73 | % Arguments: (output)
|
---|
| 74 | % polymodel - A structure containing the regression model
|
---|
| 75 | % polymodel.ModelTerms = list of terms in the model
|
---|
| 76 | % polymodel.Coefficients = regression coefficients
|
---|
| 77 | % polymodel.ParameterVar = variances of model coefficients
|
---|
| 78 | % polymodel.ParameterStd = standard deviation of model coefficients
|
---|
| 79 | % polymodel.R2 = R^2 for the regression model
|
---|
| 80 | % polymodel.AdjustedR2 = Adjusted R^2 for the regression model
|
---|
| 81 | % polymodel.RMSE = Root mean squared error
|
---|
| 82 | % polymodel.VarNames = Cell array of variable names
|
---|
| 83 | % as parsed from a char based model specification.
|
---|
| 84 | %
|
---|
| 85 | % Note 1: Because the terms in a general polynomial
|
---|
| 86 | % model can be arbitrarily chosen by the user, I must
|
---|
| 87 | % package the erms and coefficients together into a
|
---|
| 88 | % structure. This also forces use of a special evaluation
|
---|
| 89 | % tool: polyvaln.
|
---|
| 90 | %
|
---|
| 91 | % Note 2: A polymodel can be evaluated for any set
|
---|
| 92 | % of values with the function polyvaln. However, if
|
---|
| 93 | % you wish to manipulate the result symbolically using
|
---|
| 94 | % my own sympoly tools, this structure can be converted
|
---|
| 95 | % to a sympoly using the function polyn2sympoly. There
|
---|
| 96 | % is also a polyn2sym tool, for those who prefer the
|
---|
| 97 | % symbolic TB.
|
---|
| 98 | %
|
---|
| 99 | % Note 3: When no constant term is included in the model,
|
---|
| 100 | % the traditional R^2 can be negative. This case is
|
---|
| 101 | % identified, and then a more appropriate computation
|
---|
| 102 | % for R^2 is then used.
|
---|
| 103 | %
|
---|
| 104 | % Note 4: Adjusted R^2 accounts for changing degrees of
|
---|
| 105 | % freedom in the model. It CAN be negative, and will always
|
---|
| 106 | % be less than the traditional R^2 values.
|
---|
| 107 | %
|
---|
| 108 | % Find my sympoly toolbox here:
|
---|
| 109 | % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=9577&objectType=FILE
|
---|
| 110 | %
|
---|
| 111 | % See also: polyvaln, polyfit, polyval, polyn2sympoly, sympoly
|
---|
| 112 | %
|
---|
| 113 | % Author: John D'Errico
|
---|
| 114 | % Release: 2.0
|
---|
| 115 | % Release date: 2/19/06
|
---|
| 116 |
|
---|
| 117 | if nargin<1
|
---|
| 118 | help polyfitn
|
---|
| 119 | return
|
---|
| 120 | end
|
---|
| 121 |
|
---|
| 122 | % get sizes, test for consistency
|
---|
| 123 | [n,p] = size(indepvar);
|
---|
| 124 | if n == 1
|
---|
| 125 | indepvar = indepvar';
|
---|
| 126 | [n,p] = size(indepvar);
|
---|
| 127 | end
|
---|
| 128 | [m,q] = size(depvar);
|
---|
| 129 | if m == 1
|
---|
| 130 | depvar = depvar';
|
---|
| 131 | [m,q] = size(depvar);
|
---|
| 132 | end
|
---|
| 133 | % only 1 dependent variable allowed at a time
|
---|
| 134 | if q~=1
|
---|
| 135 | error 'Only 1 dependent variable allowed at a time.'
|
---|
| 136 | end
|
---|
| 137 |
|
---|
| 138 | if n~=m
|
---|
| 139 | error 'indepvar and depvar are of inconsistent sizes.'
|
---|
| 140 | end
|
---|
| 141 |
|
---|
| 142 | % Automatically scale the independent variables to unit variance
|
---|
| 143 | stdind = sqrt(diag(cov(indepvar)));
|
---|
| 144 | if any(stdind==0)
|
---|
| 145 | warning 'Constant terms in the model must be entered using modelterms'
|
---|
| 146 | stdind(stdind==0) = 1;
|
---|
| 147 | end
|
---|
| 148 | % scaled variables
|
---|
| 149 | indepvar_s = indepvar*diag(1./stdind);
|
---|
| 150 |
|
---|
| 151 | % do we need to parse a supplied model?
|
---|
| 152 | if iscell(modelterms) || ischar(modelterms)
|
---|
| 153 | [modelterms,varlist] = parsemodel(modelterms,p);
|
---|
| 154 | if size(modelterms,2) < p
|
---|
| 155 | modelterms = [modelterms, zeros(size(modelterms,1),p - size(modelterms,2))];
|
---|
| 156 | end
|
---|
| 157 | elseif length(modelterms) == 1
|
---|
| 158 | % do we need to generate a set of modelterms?
|
---|
| 159 | [modelterms,varlist] = buildcompletemodel(modelterms,p);
|
---|
| 160 | elseif size(modelterms,2) ~= p
|
---|
| 161 | error 'ModelTerms must be a scalar or have the same # of columns as indepvar'
|
---|
| 162 | else
|
---|
| 163 | varlist = repmat({''},1,p);
|
---|
| 164 | end
|
---|
| 165 | nt = size(modelterms,1);
|
---|
| 166 |
|
---|
| 167 | % check for replicate terms
|
---|
| 168 | if nt>1
|
---|
| 169 | mtu = unique(modelterms,'rows');
|
---|
| 170 | if size(mtu,1)<nt
|
---|
| 171 | warning 'Replicate terms identified in the model.'
|
---|
| 172 | end
|
---|
| 173 | end
|
---|
| 174 |
|
---|
| 175 | % build the design matrix
|
---|
| 176 | M = ones(n,nt);
|
---|
| 177 | scalefact = ones(1,nt);
|
---|
| 178 | for i = 1:nt
|
---|
| 179 | for j = 1:p
|
---|
| 180 | M(:,i) = M(:,i).*indepvar_s(:,j).^modelterms(i,j);
|
---|
| 181 | scalefact(i) = scalefact(i)/(stdind(j)^modelterms(i,j));
|
---|
| 182 | end
|
---|
| 183 | end
|
---|
| 184 |
|
---|
| 185 | % estimate the model using QR. do it this way to provide a
|
---|
| 186 | % covariance matrix when all done. Use a pivoted QR for
|
---|
| 187 | % maximum stability.
|
---|
| 188 | [Q,R,E] = qr(M,0);
|
---|
| 189 |
|
---|
| 190 | polymodel.ModelTerms = modelterms;
|
---|
| 191 | polymodel.Coefficients(E) = R\(Q'*depvar);
|
---|
| 192 | yhat = M*polymodel.Coefficients(:);
|
---|
| 193 |
|
---|
| 194 | % recover the scaling
|
---|
| 195 | polymodel.Coefficients=polymodel.Coefficients.*scalefact;
|
---|
| 196 |
|
---|
| 197 | % variance of the regression parameters
|
---|
| 198 | s = norm(depvar - yhat);
|
---|
| 199 | if n > nt
|
---|
| 200 | Rinv = R\eye(nt);
|
---|
| 201 | Var(E) = s^2*sum(Rinv.^2,2)/(n-nt);
|
---|
| 202 | polymodel.ParameterVar = Var.*(scalefact.^2);
|
---|
| 203 | polymodel.ParameterStd = sqrt(polymodel.ParameterVar);
|
---|
| 204 | else
|
---|
| 205 | % we cannot form variance or standard error estimates
|
---|
| 206 | % unless there are at least as many data points as
|
---|
| 207 | % parameters to estimate.
|
---|
| 208 | polymodel.ParameterVar = inf(1,nt);
|
---|
| 209 | polymodel.ParameterStd = inf(1,nt);
|
---|
| 210 | end
|
---|
| 211 |
|
---|
| 212 | % degrees of freedom
|
---|
| 213 | polymodel.DoF = n - nt;
|
---|
| 214 |
|
---|
| 215 | % coefficient/sd ratio for a p-value
|
---|
| 216 | t = polymodel.Coefficients./polymodel.ParameterStd;
|
---|
| 217 |
|
---|
| 218 | % twice the upper tail probability from the t distribution,
|
---|
| 219 | % as a transformation from an incomplete beta. This provides
|
---|
| 220 | % a two-sided test for the corresponding coefficient.
|
---|
| 221 | % I could have used tcdf, if I wanted to presume the
|
---|
| 222 | % stats toolbox was present. Of course, then regstats is
|
---|
| 223 | % an option. In that case, the comparable result would be
|
---|
| 224 | % found in: STATS.tstat.pval
|
---|
| 225 | polymodel.p = betainc(polymodel.DoF./(t.^2 + polymodel.DoF),polymodel.DoF/2,1/2);
|
---|
| 226 |
|
---|
| 227 | % R^2
|
---|
| 228 | % is there a constant term in the model? If not, then
|
---|
| 229 | % we cannot use the standard R^2 computation, as it
|
---|
| 230 | % frequently yields negative values for R^2.
|
---|
| 231 | if any((M(1,:) ~= 0) & all(diff(M,1,1) == 0,1))
|
---|
| 232 | % we have a constant term in the model, so the
|
---|
| 233 | % traditional R^2 form is acceptable.
|
---|
| 234 | polymodel.R2 = max(0,1 - (s/norm(depvar-mean(depvar)) )^2);
|
---|
| 235 | % compute adjusted R^2, taking into account the number of
|
---|
| 236 | % degrees of freedom
|
---|
| 237 | polymodel.AdjustedR2 = 1 - (1 - polymodel.R2).*((n - 1)./(n - nt));
|
---|
| 238 | else
|
---|
| 239 | % no constant term was found in the model
|
---|
| 240 | polymodel.R2 = max(0,1 - (s/norm(depvar))^2);
|
---|
| 241 | % compute adjusted R^2, taking into account the number of
|
---|
| 242 | % degrees of freedom
|
---|
| 243 | polymodel.AdjustedR2 = 1 - (1 - polymodel.R2).*(n./(n - nt));
|
---|
| 244 | end
|
---|
| 245 |
|
---|
| 246 | % RMSE
|
---|
| 247 | polymodel.RMSE = sqrt(mean((depvar - yhat).^2));
|
---|
| 248 |
|
---|
| 249 | % if a character 'model' was supplied, return the list
|
---|
| 250 | % of variables as parsed out
|
---|
| 251 | polymodel.VarNames = varlist;
|
---|
| 252 |
|
---|
| 253 | % ==================================================
|
---|
| 254 | % =============== begin subfunctions ===============
|
---|
| 255 | % ==================================================
|
---|
| 256 | function [modelterms,varlist] = buildcompletemodel(order,p)
|
---|
| 257 | %
|
---|
| 258 | % arguments: (input)
|
---|
| 259 | % order - scalar integer, defines the total (maximum) order
|
---|
| 260 | %
|
---|
| 261 | % p - scalar integer - defines the dimension of the
|
---|
| 262 | % independent variable space
|
---|
| 263 | %
|
---|
| 264 | % arguments: (output)
|
---|
| 265 | % modelterms - exponent array for the model
|
---|
| 266 | %
|
---|
| 267 | % varlist - cell array of character variable names
|
---|
| 268 |
|
---|
| 269 | % build the exponent array recursively
|
---|
| 270 | if p == 0
|
---|
| 271 | % terminal case
|
---|
| 272 | modelterms = [];
|
---|
| 273 | elseif (order == 0)
|
---|
| 274 | % terminal case
|
---|
| 275 | modelterms = zeros(1,p);
|
---|
| 276 | elseif (p==1)
|
---|
| 277 | % terminal case
|
---|
| 278 | modelterms = (order:-1:0)';
|
---|
| 279 | else
|
---|
| 280 | % general recursive case
|
---|
| 281 | modelterms = zeros(0,p);
|
---|
| 282 | for k = order:-1:0
|
---|
| 283 | t = buildcompletemodel(order-k,p-1);
|
---|
| 284 | nt = size(t,1);
|
---|
| 285 | modelterms = [modelterms;[repmat(k,nt,1),t]];
|
---|
| 286 | end
|
---|
| 287 | end
|
---|
| 288 |
|
---|
| 289 | % create a list of variable names for the variables on the fly
|
---|
| 290 | varlist = cell(1,p);
|
---|
| 291 | for i = 1:p
|
---|
| 292 | varlist{i} = ['X',num2str(i)];
|
---|
| 293 | end
|
---|
| 294 |
|
---|
| 295 |
|
---|
| 296 | % ==================================================
|
---|
| 297 | function [modelterms,varlist] = parsemodel(model,p);
|
---|
| 298 | %
|
---|
| 299 | % arguments: (input)
|
---|
| 300 | % model - character string or cell array of strings
|
---|
| 301 | %
|
---|
| 302 | % p - number of independent variables in the model
|
---|
| 303 | %
|
---|
| 304 | % arguments: (output)
|
---|
| 305 | % modelterms - exponent array for the model
|
---|
| 306 |
|
---|
| 307 | modelterms = zeros(0,p);
|
---|
| 308 | if ischar(model)
|
---|
| 309 | model = deblank(model);
|
---|
| 310 | end
|
---|
| 311 |
|
---|
| 312 | varlist = {};
|
---|
| 313 | while ~isempty(model)
|
---|
| 314 | if iscellstr(model)
|
---|
| 315 | term = model{1};
|
---|
| 316 | model(1) = [];
|
---|
| 317 | else
|
---|
| 318 | [term,model] = strtok(model,' ,');
|
---|
| 319 | end
|
---|
| 320 |
|
---|
| 321 | % We've stripped off a model term. Now parse it.
|
---|
| 322 |
|
---|
| 323 | % Is it the reserved keyword 'constant'?
|
---|
| 324 | if strcmpi(term,'constant')
|
---|
| 325 | modelterms(end+1,:) = 0;
|
---|
| 326 | else
|
---|
| 327 | % pick this term apart
|
---|
| 328 | expon = zeros(1,p);
|
---|
| 329 | while ~isempty(term)
|
---|
| 330 | vn = strtok(term,'*/^. ,');
|
---|
| 331 | k = find(strncmp(vn,varlist,length(vn)));
|
---|
| 332 | if isempty(k)
|
---|
| 333 | % its a variable name we have not yet seen
|
---|
| 334 |
|
---|
| 335 | % is it a legal name?
|
---|
| 336 | nv = length(varlist);
|
---|
| 337 | if ismember(vn(1),'1234567890_')
|
---|
| 338 | error(['Variable is not a valid name: ''',vn,''''])
|
---|
| 339 | elseif nv>=p
|
---|
| 340 | error 'More variables in the model than columns of indepvar'
|
---|
| 341 | end
|
---|
| 342 |
|
---|
| 343 | varlist{nv+1} = vn;
|
---|
| 344 |
|
---|
| 345 | k = nv+1;
|
---|
| 346 | end
|
---|
| 347 | % variable must now be in the list of vars.
|
---|
| 348 |
|
---|
| 349 | % drop that variable from term
|
---|
| 350 | i = strfind(term,vn);
|
---|
| 351 | term = term((i+length(vn)):end);
|
---|
| 352 |
|
---|
| 353 | % is there an exponent?
|
---|
| 354 | eflag = false;
|
---|
| 355 | if strncmp('^',term,1)
|
---|
| 356 | term(1) = [];
|
---|
| 357 | eflag = true;
|
---|
| 358 | elseif strncmp('.^',term,2)
|
---|
| 359 | term(1:2) = [];
|
---|
| 360 | eflag = true;
|
---|
| 361 | end
|
---|
| 362 |
|
---|
| 363 | % If there was one, get it
|
---|
| 364 | ev = 1;
|
---|
| 365 | if eflag
|
---|
| 366 | ev = sscanf(term,'%f');
|
---|
| 367 | if isempty(ev)
|
---|
| 368 | error 'Problem with an exponent in parsing the model'
|
---|
| 369 | end
|
---|
| 370 | end
|
---|
| 371 | expon(k) = expon(k) + ev;
|
---|
| 372 |
|
---|
| 373 | % next monomial subterm?
|
---|
| 374 | k1 = strfind(term,'*');
|
---|
| 375 | if isempty(k1)
|
---|
| 376 | term = '';
|
---|
| 377 | else
|
---|
| 378 | term(k1(1)) = ' ';
|
---|
| 379 | end
|
---|
| 380 |
|
---|
| 381 | end
|
---|
| 382 |
|
---|
| 383 | modelterms(end+1,:) = expon;
|
---|
| 384 |
|
---|
| 385 | end
|
---|
| 386 |
|
---|
| 387 | end
|
---|
| 388 |
|
---|
| 389 | % Once we have compiled the list of variables and
|
---|
| 390 | % exponents, we need to sort them in alphabetical order
|
---|
| 391 | [varlist,tags] = sort(varlist);
|
---|
| 392 | modelterms = modelterms(:,tags); |
---|