Changeset 925 for trunk/src/@xmltree/private/xml_parser.m
- Timestamp:
- Feb 17, 2016, 12:52:48 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/@xmltree/private/xml_parser.m
r820 r925 5 5 % xmlstr - XML string to parse 6 6 % tree - tree structure corresponding to the XML file 7 %_______________________________________________________________________ 8 % 9 % xml_parser.m is an XML 1.0 (http://www.w3.org/TR/REC-xml) parser 10 % written in Matlab. It aims to be fully conforming. It is currently not11 % a validatingXML processor.7 %__________________________________________________________________________ 8 % 9 % xml_parser.m is an XML 1.0 (http://www.w3.org/TR/REC-xml) parser. 10 % It aims to be fully conforming. It is currently not a validating 11 % XML processor. 12 12 % 13 13 % A description of the tree structure provided in output is detailed in 14 14 % the header of this m-file. 15 %_______________________________________________________________________ 16 % @(#)xml_parser.m Guillaume Flandin 2002/04/04 17 18 % XML Processor for MATLAB (The Mathworks, Inc.). 19 % Copyright (C) 2002-2003 Guillaume Flandin <Guillaume@artefact.tk> 15 %__________________________________________________________________________ 16 % Copyright (C) 2002-2015 http://www.artefact.tk/ 17 18 % Guillaume Flandin 19 % $Id: xml_parser.m 6480 2015-06-13 01:08:30Z guillaume $ 20 21 % XML Processor for GNU Octave and MATLAB (The Mathworks, Inc.) 22 % Copyright (C) 2002-2015 Guillaume Flandin <Guillaume@artefact.tk> 20 23 % 21 24 % This program is free software; you can redistribute it and/or … … 32 35 % along with this program; if not, write to the Free Software 33 36 % Foundation Inc, 59 Temple Pl. - Suite 330, Boston, MA 02111-1307, USA. 34 %----------------------------------------------------------------------- 37 %-------------------------------------------------------------------------- 35 38 36 39 % Suggestions for improvement and fixes are always welcome, although no … … 39 42 % Check also the latest developments on the following webpage: 40 43 % <http://www.artefact.tk/software/matlab/xml/> 41 %----------------------------------------------------------------------- 44 %-------------------------------------------------------------------------- 42 45 43 46 % The implementation of this XML parser is much inspired from a 44 % Javascript parser available at <http://www.jeremie.com/>45 46 % A mex-file xml_findstr.c is also required, to encompass some47 % limitations of the built-in findstr Matlabfunction.47 % Javascript parser that used to be available at <http://www.jeremie.com/> 48 49 % A C-MEX file xml_findstr.c is also required, to encompass some 50 % limitations of the built-in FINDSTR function. 48 51 % Compile it on your architecture using 'mex -O xml_findstr.c' command 49 52 % if the compiled version for your system is not provided. 50 % If this function behaves badly (crash or wrong results), comment the51 % line'#define __HACK_MXCHAR__' in xml_findstr.c and compile it again.52 %----------------------------------------------------------------------- 53 % If this function does not behave as expected, comment the line 54 % '#define __HACK_MXCHAR__' in xml_findstr.c and compile it again. 55 %-------------------------------------------------------------------------- 53 56 54 57 % Structure of the output tree: … … 91 94 % |_ uid: double 92 95 % 93 %----------------------------------------------------------------------- 96 %-------------------------------------------------------------------------- 94 97 95 98 % TODO/BUG/FEATURES: … … 103 106 % - xml_findstr is indeed xml_strfind according to Mathworks vocabulary 104 107 % - problem with entities: do we need to convert them here? (é) 105 %----------------------------------------------------------------------- 108 %-------------------------------------------------------------------------- 106 109 107 110 %- XML string to parse and number of tags read … … 109 112 110 113 %- Check input arguments 111 error(nargchk(1,1,nargin));114 %error(nargchk(1,1,nargin)); 112 115 if isempty(xmlstr) 113 114 elseif ~is str(xmlstr)| sum(size(xmlstr)>1)>1115 116 error('[XML] Not enough parameters.') 117 elseif ~ischar(xmlstr) || sum(size(xmlstr)>1)>1 118 error('[XML] Input must be a string.') 116 119 end 117 120 … … 137 140 clear global xmlstring Xparse_count xtree; 138 141 139 %======================================================================= 142 %========================================================================== 140 143 % SUBFUNCTIONS 141 144 142 %----------------------------------------------------------------------- 145 %-------------------------------------------------------------------------- 143 146 function frag = compile(frag) 144 global xmlstring xtree Xparse_count; 145 146 while 1, 147 if length(xmlstring)<=frag.str | ... 148 (frag.str == length(xmlstring)-1 & strcmp(xmlstring(frag.str:end),' ')) 149 return 150 end 151 TagStart = xml_findstr(xmlstring,'<',frag.str,1); 152 if isempty(TagStart) 153 %- Character data 154 error(sprintf(['[XML] Unknown data at the end of the XML file.\n' ... 155 ' Please send me your XML file at Guillaume@artefact.tk'])); 156 xtree{Xparse_count} = chardata; 157 xtree{Xparse_count}.value = erode(entity(xmlstring(frag.str:end))); 158 xtree{Xparse_count}.parent = frag.parent; 159 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 160 frag.str = ''; 161 elseif TagStart > frag.str 162 if strcmp(xmlstring(frag.str:TagStart-1),' ') 163 %- A single white space before a tag (ignore) 164 frag.str = TagStart; 165 else 166 %- Character data 167 xtree{Xparse_count} = chardata; 168 xtree{Xparse_count}.value = erode(entity(xmlstring(frag.str:TagStart-1))); 169 xtree{Xparse_count}.parent = frag.parent; 170 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 171 frag.str = TagStart; 172 end 173 else 174 if strcmp(xmlstring(frag.str+1),'?') 175 %- Processing instruction 176 frag = tag_pi(frag); 177 else 178 if length(xmlstring)-frag.str>4 & strcmp(xmlstring(frag.str+1:frag.str+3),'!--') 179 %- Comment 180 frag = tag_comment(frag); 181 else 182 if length(xmlstring)-frag.str>9 & strcmp(xmlstring(frag.str+1:frag.str+8),'![CDATA[') 183 %- Litteral data 184 frag = tag_cdata(frag); 185 else 186 %- A tag element (empty (<.../>) or not) 187 if ~isempty(frag.end) 188 endmk = ['/' frag.end '>']; 189 else 190 endmk = '/>'; 191 end 192 if strcmp(xmlstring(frag.str+1:frag.str+length(frag.end)+2),endmk) | ... 193 strcmp(strip(xmlstring(frag.str+1:frag.str+length(frag.end)+2)),endmk) 194 frag.str = frag.str + length(frag.end)+3; 195 return 196 else 197 frag = tag_element(frag); 198 end 199 end 200 end 201 end 202 end 203 end 204 205 %----------------------------------------------------------------------- 147 global xmlstring xtree Xparse_count; 148 149 while 1, 150 if length(xmlstring)<=frag.str || ... 151 (frag.str == length(xmlstring)-1 && strcmp(xmlstring(frag.str:end),' ')) 152 return 153 end 154 TagStart = xml_findstr(xmlstring,'<',frag.str,1); 155 if isempty(TagStart) 156 %- Character data 157 error('[XML] Unknown data at the end of the XML file.'); 158 Xparse_count = Xparse_count + 1; 159 xtree{Xparse_count} = chardata; 160 xtree{Xparse_count}.value = erode(entity(xmlstring(frag.str:end))); 161 xtree{Xparse_count}.parent = frag.parent; 162 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 163 frag.str = ''; 164 elseif TagStart > frag.str 165 if strcmp(xmlstring(frag.str:TagStart-1),' ') 166 %- A single white space before a tag (ignore) 167 frag.str = TagStart; 168 else 169 %- Character data 170 Xparse_count = Xparse_count + 1; 171 xtree{Xparse_count} = chardata; 172 xtree{Xparse_count}.value = erode(entity(xmlstring(frag.str:TagStart-1))); 173 xtree{Xparse_count}.parent = frag.parent; 174 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 175 frag.str = TagStart; 176 end 177 else 178 if strcmp(xmlstring(frag.str+1),'?') 179 %- Processing instruction 180 frag = tag_pi(frag); 181 else 182 if length(xmlstring)-frag.str>4 && strcmp(xmlstring(frag.str+1:frag.str+3),'!--') 183 %- Comment 184 frag = tag_comment(frag); 185 else 186 if length(xmlstring)-frag.str>9 && strcmp(xmlstring(frag.str+1:frag.str+8),'![CDATA[') 187 %- Litteral data 188 frag = tag_cdata(frag); 189 else 190 %- A tag element (empty (<.../>) or not) 191 if ~isempty(frag.end) 192 endmk = ['/' frag.end '>']; 193 else 194 endmk = '/>'; 195 end 196 if strcmp(xmlstring(frag.str+1:frag.str+length(frag.end)+2),endmk) || ... 197 strcmp(strip(xmlstring(frag.str+1:frag.str+length(frag.end)+2)),endmk) 198 frag.str = frag.str + length(frag.end)+3; 199 return 200 else 201 frag = tag_element(frag); 202 end 203 end 204 end 205 end 206 end 207 end 208 209 %-------------------------------------------------------------------------- 206 210 function frag = tag_element(frag) 207 global xmlstring xtree Xparse_count; 208 close = xml_findstr(xmlstring,'>',frag.str,1); 209 if isempty(close) 210 error('[XML] Tag < opened but not closed.'); 211 else 212 empty = strcmp(xmlstring(close-1:close),'/>'); 213 if empty 214 close = close - 1; 215 end 216 starttag = normalize(xmlstring(frag.str+1:close-1)); 217 nextspace = xml_findstr(starttag,' ',1,1); 218 attribs = ''; 219 if isempty(nextspace) 220 name = starttag; 221 else 222 name = starttag(1:nextspace-1); 223 attribs = starttag(nextspace+1:end); 224 end 225 xtree{Xparse_count} = element; 226 xtree{Xparse_count}.name = strip(name); 227 if frag.parent 228 xtree{Xparse_count}.parent = frag.parent; 229 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 230 end 231 if length(attribs) > 0 232 xtree{Xparse_count}.attributes = attribution(attribs); 233 end 234 if ~empty 235 contents = fragment; 236 contents.str = close+1; 237 contents.end = name; 238 contents.parent = Xparse_count; 239 contents = compile(contents); 240 frag.str = contents.str; 241 else 242 frag.str = close+2; 243 end 244 end 245 246 %----------------------------------------------------------------------- 211 global xmlstring xtree Xparse_count; 212 close = xml_findstr(xmlstring,'>',frag.str,1); 213 if isempty(close) 214 error('[XML] Tag < opened but not closed.'); 215 else 216 empty = strcmp(xmlstring(close-1:close),'/>'); 217 if empty 218 close = close - 1; 219 end 220 starttag = normalize(xmlstring(frag.str+1:close-1)); 221 nextspace = xml_findstr(starttag,' ',1,1); 222 attribs = ''; 223 if isempty(nextspace) 224 name = starttag; 225 else 226 name = starttag(1:nextspace-1); 227 attribs = starttag(nextspace+1:end); 228 end 229 Xparse_count = Xparse_count + 1; 230 xtree{Xparse_count} = element; 231 xtree{Xparse_count}.name = strip(name); 232 if frag.parent 233 xtree{Xparse_count}.parent = frag.parent; 234 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 235 end 236 if ~isempty(attribs) 237 xtree{Xparse_count}.attributes = attribution(attribs); 238 end 239 if ~empty 240 contents = fragment; 241 contents.str = close+1; 242 contents.end = name; 243 contents.parent = Xparse_count; 244 contents = compile(contents); 245 frag.str = contents.str; 246 else 247 frag.str = close+2; 248 end 249 end 250 251 %-------------------------------------------------------------------------- 247 252 function frag = tag_pi(frag) 248 global xmlstring xtree Xparse_count; 249 close = xml_findstr(xmlstring,'?>',frag.str,1); 250 if isempty(close) 251 warning('[XML] Tag <? opened but not closed.') 252 else 253 nextspace = xml_findstr(xmlstring,' ',frag.str,1); 254 xtree{Xparse_count} = pri; 255 if nextspace > close | nextspace == frag.str+2 256 xtree{Xparse_count}.value = erode(xmlstring(frag.str+2:close-1)); 257 else 258 xtree{Xparse_count}.value = erode(xmlstring(nextspace+1:close-1)); 259 xtree{Xparse_count}.target = erode(xmlstring(frag.str+2:nextspace)); 260 end 261 if frag.parent 262 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 263 xtree{Xparse_count}.parent = frag.parent; 264 end 265 frag.str = close+2; 266 end 267 268 %----------------------------------------------------------------------- 253 global xmlstring xtree Xparse_count; 254 close = xml_findstr(xmlstring,'?>',frag.str,1); 255 if isempty(close) 256 warning('[XML] Tag <? opened but not closed.') 257 else 258 nextspace = xml_findstr(xmlstring,' ',frag.str,1); 259 Xparse_count = Xparse_count + 1; 260 xtree{Xparse_count} = pri; 261 if nextspace > close || nextspace == frag.str+2 262 xtree{Xparse_count}.value = erode(xmlstring(frag.str+2:close-1)); 263 else 264 xtree{Xparse_count}.value = erode(xmlstring(nextspace+1:close-1)); 265 xtree{Xparse_count}.target = erode(xmlstring(frag.str+2:nextspace)); 266 end 267 if frag.parent 268 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 269 xtree{Xparse_count}.parent = frag.parent; 270 end 271 frag.str = close+2; 272 end 273 274 %-------------------------------------------------------------------------- 269 275 function frag = tag_comment(frag) 270 global xmlstring xtree Xparse_count; 271 close = xml_findstr(xmlstring,'-->',frag.str,1); 272 if isempty(close) 273 warning('[XML] Tag <!-- opened but not closed.') 274 else 275 xtree{Xparse_count} = comment; 276 xtree{Xparse_count}.value = erode(xmlstring(frag.str+4:close-1)); 277 if frag.parent 278 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 279 xtree{Xparse_count}.parent = frag.parent; 280 end 281 frag.str = close+3; 282 end 283 284 %----------------------------------------------------------------------- 276 global xmlstring xtree Xparse_count; 277 close = xml_findstr(xmlstring,'-->',frag.str,1); 278 if isempty(close) 279 warning('[XML] Tag <!-- opened but not closed.') 280 else 281 Xparse_count = Xparse_count + 1; 282 xtree{Xparse_count} = comment; 283 xtree{Xparse_count}.value = erode(xmlstring(frag.str+4:close-1)); 284 if frag.parent 285 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 286 xtree{Xparse_count}.parent = frag.parent; 287 end 288 frag.str = close+3; 289 end 290 291 %-------------------------------------------------------------------------- 285 292 function frag = tag_cdata(frag) 286 global xmlstring xtree Xparse_count; 287 close = xml_findstr(xmlstring,']]>',frag.str,1); 288 if isempty(close) 289 warning('[XML] Tag <![CDATA[ opened but not closed.') 290 else 291 xtree{Xparse_count} = cdata; 292 xtree{Xparse_count}.value = xmlstring(frag.str+9:close-1); 293 if frag.parent 294 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 295 xtree{Xparse_count}.parent = frag.parent; 296 end 297 frag.str = close+3; 298 end 299 300 %----------------------------------------------------------------------- 293 global xmlstring xtree Xparse_count; 294 close = xml_findstr(xmlstring,']]>',frag.str,1); 295 if isempty(close) 296 warning('[XML] Tag <![CDATA[ opened but not closed.') 297 else 298 Xparse_count = Xparse_count + 1; 299 xtree{Xparse_count} = cdata; 300 xtree{Xparse_count}.value = xmlstring(frag.str+9:close-1); 301 if frag.parent 302 xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; 303 xtree{Xparse_count}.parent = frag.parent; 304 end 305 frag.str = close+3; 306 end 307 308 %-------------------------------------------------------------------------- 301 309 function all = attribution(str) 302 303 304 305 306 307 308 if isempty(str)| isempty(eq), return; end309 id = xml_findstr(str,'"',1,1); % should also look for '''' 310 nextid = xml_findstr(str,'"',id+1,1);% rather than only '"' 311 312 313 314 315 316 317 %----------------------------------------------------------------------- 310 %- Initialize attributs 311 nbattr = 0; 312 all = cell(nbattr); 313 %- Look for 'key="value"' substrings 314 while 1, 315 eq = xml_findstr(str,'=',1,1); 316 if isempty(str) || isempty(eq), return; end 317 id = sort([xml_findstr(str,'"',1,1),xml_findstr(str,'''',1,1)]); id=id(1); 318 nextid = sort([xml_findstr(str,'"',id+1,1),xml_findstr(str,'''',id+1,1)]);nextid=nextid(1); 319 nbattr = nbattr + 1; 320 all{nbattr}.key = strip(str(1:(eq-1))); 321 all{nbattr}.val = entity(str((id+1):(nextid-1))); 322 str = str((nextid+1):end); 323 end 324 325 %-------------------------------------------------------------------------- 318 326 function elm = element 319 global Xparse_count; 320 Xparse_count = Xparse_count + 1; 321 elm = struct('type','element','name','','attributes',[],'contents',[],'parent',[],'uid',Xparse_count); 327 global Xparse_count; 328 elm = struct('type','element','name','','attributes',[],'contents',[],'parent',[],'uid',Xparse_count); 322 329 323 %----------------------------------------------------------------------- 330 %-------------------------------------------------------------------------- 324 331 function cdat = chardata 325 global Xparse_count; 326 Xparse_count = Xparse_count + 1; 327 cdat = struct('type','chardata','value','','parent',[],'uid',Xparse_count); 332 global Xparse_count; 333 cdat = struct('type','chardata','value','','parent',[],'uid',Xparse_count); 328 334 329 %----------------------------------------------------------------------- 335 %-------------------------------------------------------------------------- 330 336 function cdat = cdata 331 global Xparse_count; 332 Xparse_count = Xparse_count + 1; 333 cdat = struct('type','cdata','value','','parent',[],'uid',Xparse_count); 337 global Xparse_count; 338 cdat = struct('type','cdata','value','','parent',[],'uid',Xparse_count); 334 339 335 %----------------------------------------------------------------------- 340 %-------------------------------------------------------------------------- 336 341 function proce = pri 337 global Xparse_count; 338 Xparse_count = Xparse_count + 1; 339 proce = struct('type','pi','value','','target','','parent',[],'uid',Xparse_count); 340 341 %----------------------------------------------------------------------- 342 global Xparse_count; 343 proce = struct('type','pi','value','','target','','parent',[],'uid',Xparse_count); 344 345 %-------------------------------------------------------------------------- 342 346 function commt = comment 343 global Xparse_count; 344 Xparse_count = Xparse_count + 1; 345 commt = struct('type','comment','value','','parent',[],'uid',Xparse_count); 346 347 %----------------------------------------------------------------------- 347 global Xparse_count; 348 commt = struct('type','comment','value','','parent',[],'uid',Xparse_count); 349 350 %-------------------------------------------------------------------------- 348 351 function frg = fragment 349 350 351 %----------------------------------------------------------------------- 352 frg = struct('str','','parent','','end',''); 353 354 %-------------------------------------------------------------------------- 352 355 function str = prolog(str) 353 354 355 356 357 358 359 360 361 if strcmp(lower(str(start:start+2)),'<?x')362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 if (~isempty(dp)& dp < b)377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 %----------------------------------------------------------------------- 356 %- Initialize beginning index of elements tree 357 b = 1; 358 %- Initial tag 359 start = xml_findstr(str,'<',1,1); 360 if isempty(start) 361 error('[XML] No tag found.') 362 end 363 %- Header (<?xml version="1.0" ... ?>) 364 if strcmpi(str(start:start+2),'<?x') 365 close = xml_findstr(str,'?>',1,1); 366 if ~isempty(close) 367 b = close + 2; 368 else 369 warning('[XML] Header tag incomplete.') 370 end 371 end 372 %- Doctype (<!DOCTYPE type ... [ declarations ]>) 373 start = xml_findstr(str,'<!DOCTYPE',b,1); % length('<!DOCTYPE') = 9 374 if ~isempty(start) 375 close = xml_findstr(str,'>',start+9,1); 376 if ~isempty(close) 377 b = close + 1; 378 dp = xml_findstr(str,'[',start+9,1); 379 if (~isempty(dp) && dp < b) 380 k = xml_findstr(str,']>',start+9,1); 381 if ~isempty(k) 382 b = k + 2; 383 else 384 warning('[XML] Tag [ in DOCTYPE opened but not closed.') 385 end 386 end 387 else 388 warning('[XML] Tag DOCTYPE opened but not closed.') 389 end 390 end 391 %- Skip prolog from the xml string 392 str = str(b:end); 393 394 %-------------------------------------------------------------------------- 392 395 function str = strip(str) 393 a = isspace(str); 394 a = find(a==1); 395 str(a) = ''; 396 397 %----------------------------------------------------------------------- 396 str(isspace(str)) = ''; 397 398 %-------------------------------------------------------------------------- 398 399 function str = normalize(str) 399 % Find white characters (space, newline, carriage return, tabs, ...) 400 i = isspace(str); 401 i = find(i == 1); 402 str(i) = ' '; 403 % replace several white characters by only one 404 if ~isempty(i) 405 j = i - [i(2:end) i(end)]; 406 k = find(j == -1); 407 str(i(k)) = []; 408 end 409 410 %----------------------------------------------------------------------- 400 % Find white characters (space, newline, carriage return, tabs, ...) 401 i = isspace(str); 402 i = find(i == 1); 403 str(i) = ' '; 404 % replace several white characters by only one 405 if ~isempty(i) 406 j = i - [i(2:end) i(end)]; 407 str(i(j == -1)) = []; 408 end 409 410 %-------------------------------------------------------------------------- 411 411 function str = entity(str) 412 413 414 415 416 412 str = strrep(str,'<','<'); 413 str = strrep(str,'>','>'); 414 str = strrep(str,'"','"'); 415 str = strrep(str,''',''''); 416 str = strrep(str,'&','&'); 417 417 418 %----------------------------------------------------------------------- 418 %-------------------------------------------------------------------------- 419 419 function str = erode(str) 420 if ~isempty(str) & str(1)==' 'str(1)=''; end;421 if ~isempty(str) & str(end)==' 'str(end)=''; end;420 if ~isempty(str) && str(1)==' ', str(1)=''; end; 421 if ~isempty(str) && str(end)==' ', str(end)=''; end;
Note: See TracChangeset
for help on using the changeset viewer.