| 1 | function tree = xml_parser(filename) |
|---|
| 2 | % XML (eXtensible Markup Language) Processor |
|---|
| 3 | % FORMAT tree = xml_parser(filename) |
|---|
| 4 | % |
|---|
| 5 | % filename - XML file to parse |
|---|
| 6 | % tree - tree structure corresponding to the XML file |
|---|
| 7 | %_______________________________________________________________________ |
|---|
| 8 | % |
|---|
| 9 | % xml_parser.m is an XML 1.0 (http://www.w3.org/TR/REC-xml) parser |
|---|
| 10 | % written in Matlab. It aims to be fully conforming. It is currently not |
|---|
| 11 | % a validating XML processor. |
|---|
| 12 | % (based on a Javascript parser available at http://www.jeremie.com) |
|---|
| 13 | % |
|---|
| 14 | % A description of the tree structure provided in output is detailed in |
|---|
| 15 | % the header of this m-file. |
|---|
| 16 | %_______________________________________________________________________ |
|---|
| 17 | % @(#)xml_parser.m Guillaume Flandin 2002/04/04 |
|---|
| 18 | |
|---|
| 19 | % XML Processor for MATLAB (The Mathworks, Inc.). |
|---|
| 20 | % Copyright (C) 2002 Guillaume Flandin |
|---|
| 21 | % |
|---|
| 22 | % This program is free software; you can redistribute it and/or |
|---|
| 23 | % modify it under the terms of the GNU General Public License |
|---|
| 24 | % as published by the Free Software Foundation; either version 2 |
|---|
| 25 | % of the License, or any later version. |
|---|
| 26 | % |
|---|
| 27 | % This program is distributed in the hope that it will be useful, |
|---|
| 28 | % but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 29 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 30 | % GNU General Public License for more details. |
|---|
| 31 | % |
|---|
| 32 | % You should have received a copy of the GNU General Public License |
|---|
| 33 | % along with this program; if not, write to the Free Software |
|---|
| 34 | % Foundation Inc, 59 Temple Pl. - Suite 330, Boston, MA 02111-1307, USA. |
|---|
| 35 | %----------------------------------------------------------------------- |
|---|
| 36 | |
|---|
| 37 | % Please feel free to email the author any comment/suggestion/bug report |
|---|
| 38 | % to improve this XML processor in Matlab. |
|---|
| 39 | % Email: Guillaume.Flandin@sophia.inria.fr |
|---|
| 40 | % Check also the latest developments on the following webpage: |
|---|
| 41 | % http://www-sop.inria.fr/epidaure/personnel/flandin/xml/ |
|---|
| 42 | %----------------------------------------------------------------------- |
|---|
| 43 | |
|---|
| 44 | % A mex-file xml_findstr.c is also required, to encompass some |
|---|
| 45 | % limitations of the built-in findstr Matlab function. |
|---|
| 46 | % Compile it on your architecture using 'mex -O xml_findstr.c' command |
|---|
| 47 | % if the compiled version for your system is not provided. |
|---|
| 48 | % If this function behaves badly (crash or wrong results), comment the |
|---|
| 49 | % line '#define __HACK_MXCHAR__' in xml_findstr.c and compile it again. |
|---|
| 50 | %----------------------------------------------------------------------- |
|---|
| 51 | |
|---|
| 52 | % Structure of the output tree: |
|---|
| 53 | % There are 5 types of nodes in an XML file: element, chardata, cdata, |
|---|
| 54 | % pi and comment. |
|---|
| 55 | % Each of them contains an UID (Unique Identifier): an integer between |
|---|
| 56 | % 1 and the number of nodes of the XML file. |
|---|
| 57 | % |
|---|
| 58 | % element (a tag <name key="value"> [contents] </name> |
|---|
| 59 | % |_ type: 'element' |
|---|
| 60 | % |_ name: string |
|---|
| 61 | % |_ attributes: cell array of struct 'key' and 'value' or [] |
|---|
| 62 | % |_ contents: double array of uid's or [] if empty |
|---|
| 63 | % |_ parent: uid of the parent ([] if root) |
|---|
| 64 | % |_ uid: double |
|---|
| 65 | % |
|---|
| 66 | % chardata (a character array) |
|---|
| 67 | % |_ type: 'chardata' |
|---|
| 68 | % |_ value: string |
|---|
| 69 | % |_ parent: uid of the parent |
|---|
| 70 | % |_ uid: double |
|---|
| 71 | % |
|---|
| 72 | % cdata (a litteral string <![CDATA[value]]>) |
|---|
| 73 | % |_ type: 'cdata' |
|---|
| 74 | % |_ value: string |
|---|
| 75 | % |_ parent: uid of the parent |
|---|
| 76 | % |_ uid: double |
|---|
| 77 | % |
|---|
| 78 | % pi (a processing instruction <?target value ?>) |
|---|
| 79 | % |_ type: 'pi' |
|---|
| 80 | % |_ target: string (may be empty) |
|---|
| 81 | % |_ value: string |
|---|
| 82 | % |_ parent: uid of the parent |
|---|
| 83 | % |_ uid: double |
|---|
| 84 | % |
|---|
| 85 | % comment (a comment <!-- value -->) |
|---|
| 86 | % |_ type: 'comment' |
|---|
| 87 | % |_ value: string |
|---|
| 88 | % |_ parent: uid of the parent |
|---|
| 89 | % |_ uid: double |
|---|
| 90 | % |
|---|
| 91 | %----------------------------------------------------------------------- |
|---|
| 92 | |
|---|
| 93 | % TODO/BUG/FEATURES: |
|---|
| 94 | % - [compile] only a warning if TagStart is empty |
|---|
| 95 | % - [attribution] should look for " and ' rather than only " |
|---|
| 96 | % - [main] with normalize as a preprocessing, CDATA are modified |
|---|
| 97 | % - [prolog] look for a DOCTYPE in the whole string even if it occurs |
|---|
| 98 | % only in a far CDATA tag (for example)... |
|---|
| 99 | % - [tag_element] erode should replace normalize here |
|---|
| 100 | % - remove globals? uppercase globals rather persistent (clear mfile)? |
|---|
| 101 | % - xml_findst is in fact xml_strfind according to Mathworks vocabulary |
|---|
| 102 | % - problem with entity (don't know if the bug is here or in save fct.) |
|---|
| 103 | %----------------------------------------------------------------------- |
|---|
| 104 | |
|---|
| 105 | %- XML string to parse and number of tags read |
|---|
| 106 | global xmlstring Xparse_count xtree; |
|---|
| 107 | |
|---|
| 108 | %- Check input arguments |
|---|
| 109 | error(nargchk(1,1,nargin)); |
|---|
| 110 | if isempty(filename) |
|---|
| 111 | error('Not enough parameters.') |
|---|
| 112 | elseif ~isstr(filename) | sum(size(filename)>1)>1 |
|---|
| 113 | error('Input must be a string filename.') |
|---|
| 114 | end |
|---|
| 115 | |
|---|
| 116 | %- Read the entire XML file |
|---|
| 117 | fid = fopen(filename,'rt'); |
|---|
| 118 | if (fid==-1) |
|---|
| 119 | error(sprintf('Cannot open %s for reading.',filename)) |
|---|
| 120 | end |
|---|
| 121 | xmlstring = fscanf(fid,'%c'); |
|---|
| 122 | fclose(fid); |
|---|
| 123 | |
|---|
| 124 | %- Initialize number of tags (<=> uid) |
|---|
| 125 | Xparse_count = 0; |
|---|
| 126 | |
|---|
| 127 | %- Remove prolog and white space characters from the XML string |
|---|
| 128 | xmlstring = normalize(prolog(xmlstring)); |
|---|
| 129 | |
|---|
| 130 | %- Initialize the XML tree |
|---|
| 131 | xtree = {}; |
|---|
| 132 | tree = fragment; |
|---|
| 133 | tree.str = 1; |
|---|
| 134 | tree.parent = 0; |
|---|
| 135 | |
|---|
| 136 | %- Parse the XML string |
|---|
| 137 | tree = compile(tree); |
|---|
| 138 | |
|---|
| 139 | %- Return the XML tree |
|---|
| 140 | tree = xtree; |
|---|
| 141 | |
|---|
| 142 | %- Remove global variables from the workspace |
|---|
| 143 | clear global xmlstring Xparse_count xtree; |
|---|
| 144 | |
|---|
| 145 | %======================================================================= |
|---|
| 146 | % SUBFUNCTIONS |
|---|
| 147 | |
|---|
| 148 | %----------------------------------------------------------------------- |
|---|
| 149 | function frag = compile(frag) |
|---|
| 150 | global xmlstring xtree Xparse_count; |
|---|
| 151 | |
|---|
| 152 | while 1, |
|---|
| 153 | if length(xmlstring)<=frag.str | ... |
|---|
| 154 | (frag.str == length(xmlstring)-1 & strcmp(xmlstring(frag.str:end),' ')) |
|---|
| 155 | return |
|---|
| 156 | end |
|---|
| 157 | TagStart = xml_findstr(xmlstring,'<',frag.str,1); |
|---|
| 158 | if isempty(TagStart) |
|---|
| 159 | %- Character data (should be an error) |
|---|
| 160 | warning('[XML] Unknown data at the end of the XML file.'); |
|---|
| 161 | fprintf('Please send me your XML file at gflandin@sophia.inria.fr\n'); |
|---|
| 162 | %thisary = length(frag.ary) + 1; |
|---|
| 163 | xtree{Xparse_count+1} = chardata; |
|---|
| 164 | xtree{Xparse_count}.value = erode(entity(xmlstring(frag.str:end))); |
|---|
| 165 | xtree{Xparse_count}.parent = frag.parent; |
|---|
| 166 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
|---|
| 167 | %frag.str = ''; |
|---|
| 168 | elseif TagStart > frag.str |
|---|
| 169 | if strcmp(xmlstring(frag.str:TagStart-1),' ') |
|---|
| 170 | %- A single white space before a tag (ignore) |
|---|
| 171 | frag.str = TagStart; |
|---|
| 172 | else |
|---|
| 173 | %- Character data |
|---|
| 174 | xtree{Xparse_count} = chardata; |
|---|
| 175 | xtree{Xparse_count}.value = erode(entity(xmlstring(frag.str:TagStart-1))); |
|---|
| 176 | xtree{Xparse_count}.parent = frag.parent; |
|---|
| 177 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
|---|
| 178 | frag.str = TagStart; |
|---|
| 179 | end |
|---|
| 180 | else |
|---|
| 181 | if strcmp(xmlstring(frag.str+1),'?') |
|---|
| 182 | %- Processing instruction |
|---|
| 183 | frag = tag_pi(frag); |
|---|
| 184 | else |
|---|
| 185 | if length(xmlstring)-frag.str>4 & strcmp(xmlstring(frag.str+1:frag.str+3),'!--') |
|---|
| 186 | %- Comment |
|---|
| 187 | frag = tag_comment(frag); |
|---|
| 188 | else |
|---|
| 189 | if length(xmlstring)-frag.str>9 & strcmp(xmlstring(frag.str+1:frag.str+8),'![CDATA[') |
|---|
| 190 | %- Litteral data |
|---|
| 191 | frag = tag_cdata(frag); |
|---|
| 192 | else |
|---|
| 193 | %- A tag element (empty (<.../>) or not) |
|---|
| 194 | if ~isempty(frag.end) |
|---|
| 195 | endmk = ['/' frag.end '>']; |
|---|
| 196 | else |
|---|
| 197 | endmk = '/>'; |
|---|
| 198 | end |
|---|
| 199 | if strcmp(xmlstring(frag.str+1:frag.str+length(frag.end)+2),endmk) | ... |
|---|
| 200 | strcmp(strip(xmlstring(frag.str+1:frag.str+length(frag.end)+2)),endmk) |
|---|
| 201 | frag.str = frag.str + length(frag.end)+3; |
|---|
| 202 | return |
|---|
| 203 | else |
|---|
| 204 | frag = tag_element(frag); |
|---|
| 205 | end |
|---|
| 206 | end |
|---|
| 207 | end |
|---|
| 208 | end |
|---|
| 209 | end |
|---|
| 210 | end |
|---|
| 211 | |
|---|
| 212 | %----------------------------------------------------------------------- |
|---|
| 213 | function frag = tag_element(frag) |
|---|
| 214 | global xmlstring xtree Xparse_count; |
|---|
| 215 | close = xml_findstr(xmlstring,'>',frag.str,1); |
|---|
| 216 | if isempty(close) |
|---|
| 217 | error('[XML] Tag < opened but not closed.'); |
|---|
| 218 | else |
|---|
| 219 | empty = strcmp(xmlstring(close-1:close),'/>'); |
|---|
| 220 | if empty |
|---|
| 221 | close = close - 1; |
|---|
| 222 | end |
|---|
| 223 | starttag = normalize(xmlstring(frag.str+1:close-1)); |
|---|
| 224 | nextspace = xml_findstr(starttag,' ',1,1); |
|---|
| 225 | attribs = ''; |
|---|
| 226 | if isempty(nextspace) |
|---|
| 227 | name = starttag; |
|---|
| 228 | else |
|---|
| 229 | name = starttag(1:nextspace-1); |
|---|
| 230 | attribs = starttag(nextspace+1:end); |
|---|
| 231 | end |
|---|
| 232 | xtree{Xparse_count} = element; |
|---|
| 233 | xtree{Xparse_count}.name = strip(name); |
|---|
| 234 | if frag.parent |
|---|
| 235 | xtree{Xparse_count}.parent = frag.parent; |
|---|
| 236 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
|---|
| 237 | end |
|---|
| 238 | if length(attribs) > 0 |
|---|
| 239 | xtree{Xparse_count}.attributes = attribution(attribs); |
|---|
| 240 | end |
|---|
| 241 | if ~empty |
|---|
| 242 | contents = fragment; |
|---|
| 243 | contents.str = close+1; |
|---|
| 244 | contents.end = name; |
|---|
| 245 | contents.parent = Xparse_count; |
|---|
| 246 | contents = compile(contents); |
|---|
| 247 | frag.str = contents.str; |
|---|
| 248 | else |
|---|
| 249 | frag.str = close+2; |
|---|
| 250 | end |
|---|
| 251 | end |
|---|
| 252 | |
|---|
| 253 | %----------------------------------------------------------------------- |
|---|
| 254 | function frag = tag_pi(frag) |
|---|
| 255 | global xmlstring xtree Xparse_count; |
|---|
| 256 | close = xml_findstr(xmlstring,'?>',frag.str,1); |
|---|
| 257 | if isempty(close) |
|---|
| 258 | warning('[XML] Tag <? opened but not closed.') |
|---|
| 259 | else |
|---|
| 260 | nextspace = xml_findstr(xmlstring,' ',frag.str,1); |
|---|
| 261 | xtree{Xparse_count} = pri; |
|---|
| 262 | if nextspace > close | nextspace == frag.str+2 |
|---|
| 263 | xtree{Xparse_count}.value = erode(xmlstring(frag.str+2:close-1)); |
|---|
| 264 | else |
|---|
| 265 | xtree{Xparse_count}.value = erode(xmlstring(nextspace+1:close-1)); |
|---|
| 266 | xtree{Xparse_count}.target = erode(xmlstring(frag.str+2:nextspace)); |
|---|
| 267 | end |
|---|
| 268 | if frag.parent |
|---|
| 269 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
|---|
| 270 | xtree{Xparse_count}.parent = frag.parent; |
|---|
| 271 | end |
|---|
| 272 | frag.str = close+2; |
|---|
| 273 | end |
|---|
| 274 | |
|---|
| 275 | %----------------------------------------------------------------------- |
|---|
| 276 | function frag = tag_comment(frag) |
|---|
| 277 | global xmlstring xtree Xparse_count; |
|---|
| 278 | close = xml_findstr(xmlstring,'-->',frag.str,1); |
|---|
| 279 | if isempty(close) |
|---|
| 280 | warning('[XML] Tag <!-- opened but not closed.') |
|---|
| 281 | else |
|---|
| 282 | xtree{Xparse_count} = comment; |
|---|
| 283 | xtree{Xparse_count}.value = erode(xmlstring(frag.str+4:close-1)); |
|---|
| 284 | if frag.parent |
|---|
| 285 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
|---|
| 286 | xtree{Xparse_count}.parent = frag.parent; |
|---|
| 287 | end |
|---|
| 288 | frag.str = close+3; |
|---|
| 289 | end |
|---|
| 290 | |
|---|
| 291 | %----------------------------------------------------------------------- |
|---|
| 292 | function frag = tag_cdata(frag) |
|---|
| 293 | global xmlstring xtree Xparse_count; |
|---|
| 294 | close = xml_findstr(xmlstring,']]>',frag.str,1); |
|---|
| 295 | if isempty(close) |
|---|
| 296 | warning('[XML] Tag <![CDATA[ opened but not closed.') |
|---|
| 297 | else |
|---|
| 298 | xtree{Xparse_count} = cdata; |
|---|
| 299 | xtree{Xparse_count}.value = xmlstring(frag.str+9:close-1); |
|---|
| 300 | if frag.parent |
|---|
| 301 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
|---|
| 302 | xtree{Xparse_count}.parent = frag.parent; |
|---|
| 303 | end |
|---|
| 304 | frag.str = close+3; |
|---|
| 305 | end |
|---|
| 306 | |
|---|
| 307 | %----------------------------------------------------------------------- |
|---|
| 308 | function all = attribution(str) |
|---|
| 309 | %- Initialize attributs |
|---|
| 310 | nbattr = 0; |
|---|
| 311 | all = cell(nbattr); |
|---|
| 312 | %- Look for 'key="value"' substrings |
|---|
| 313 | while 1, |
|---|
| 314 | eq = xml_findstr(str,'=',1,1); |
|---|
| 315 | if isempty(str) | isempty(eq), return; end |
|---|
| 316 | id = xml_findstr(str,'"',1,1); % should also look for '''' |
|---|
| 317 | nextid = xml_findstr(str,'"',id+1,1);% rather than only '"' |
|---|
| 318 | nbattr = nbattr + 1; |
|---|
| 319 | all{nbattr}.key = strip(str(1:(eq-1))); |
|---|
| 320 | all{nbattr}.val = entity(str((id+1):(nextid-1))); |
|---|
| 321 | str = str((nextid+1):end); |
|---|
| 322 | end |
|---|
| 323 | |
|---|
| 324 | %----------------------------------------------------------------------- |
|---|
| 325 | function elm = element |
|---|
| 326 | global Xparse_count; |
|---|
| 327 | Xparse_count = Xparse_count + 1; |
|---|
| 328 | elm = struct('type','element','name','','attributes',[],'contents',[],'parent',[],'uid',Xparse_count); |
|---|
| 329 | |
|---|
| 330 | %----------------------------------------------------------------------- |
|---|
| 331 | function cdat = chardata |
|---|
| 332 | global Xparse_count; |
|---|
| 333 | Xparse_count = Xparse_count + 1; |
|---|
| 334 | cdat = struct('type','chardata','value','','parent',[],'uid',Xparse_count); |
|---|
| 335 | |
|---|
| 336 | %----------------------------------------------------------------------- |
|---|
| 337 | function cdat = cdata |
|---|
| 338 | global Xparse_count; |
|---|
| 339 | Xparse_count = Xparse_count + 1; |
|---|
| 340 | cdat = struct('type','cdata','value','','parent',[],'uid',Xparse_count); |
|---|
| 341 | |
|---|
| 342 | %----------------------------------------------------------------------- |
|---|
| 343 | function proce = pri |
|---|
| 344 | global Xparse_count; |
|---|
| 345 | Xparse_count = Xparse_count + 1; |
|---|
| 346 | proce = struct('type','pi','value','','target','','parent',[],'uid',Xparse_count); |
|---|
| 347 | |
|---|
| 348 | %----------------------------------------------------------------------- |
|---|
| 349 | function commt = comment |
|---|
| 350 | global Xparse_count; |
|---|
| 351 | Xparse_count = Xparse_count + 1; |
|---|
| 352 | commt = struct('type','comment','value','','parent',[],'uid',Xparse_count); |
|---|
| 353 | |
|---|
| 354 | %----------------------------------------------------------------------- |
|---|
| 355 | function frg = fragment |
|---|
| 356 | frg = struct('str','','parent','','end',''); |
|---|
| 357 | |
|---|
| 358 | %----------------------------------------------------------------------- |
|---|
| 359 | function str = prolog(str) |
|---|
| 360 | %- Initialize beginning index of elements tree |
|---|
| 361 | b = 1; |
|---|
| 362 | %- Initial tag |
|---|
| 363 | start = xml_findstr(str,'<',1,1); |
|---|
| 364 | if isempty(start) |
|---|
| 365 | error('[XML] No tag found.') |
|---|
| 366 | end |
|---|
| 367 | %- Header (<?xml version="1.0" ... ?>) |
|---|
| 368 | if strcmp(lower(str(start:start+2)),'<?x') |
|---|
| 369 | close = xml_findstr(str,'?>',1,1); |
|---|
| 370 | if ~isempty(close) |
|---|
| 371 | b = close + 2; |
|---|
| 372 | else |
|---|
| 373 | warning('[XML] Header tag incomplete.') |
|---|
| 374 | end |
|---|
| 375 | end |
|---|
| 376 | %- Doctype (<!DOCTYPE type ... [ declarations ]>) |
|---|
| 377 | start = xml_findstr(str,'<!DOCTYPE',b,1); % length('<!DOCTYPE') = 9 |
|---|
| 378 | if ~isempty(start) |
|---|
| 379 | close = xml_findstr(str,'>',start+9,1); |
|---|
| 380 | if ~isempty(close) |
|---|
| 381 | b = close + 1; |
|---|
| 382 | dp = xml_findstr(str,'[',start+9,1); |
|---|
| 383 | if (~isempty(dp) & dp < b) |
|---|
| 384 | k = xml_findstr(str,']>',start+9,1); |
|---|
| 385 | if ~isempty(k) |
|---|
| 386 | b = k + 2; |
|---|
| 387 | else |
|---|
| 388 | warning('[XML] Tag [ in DOCTYPE opened but not closed.') |
|---|
| 389 | end |
|---|
| 390 | end |
|---|
| 391 | else |
|---|
| 392 | warning('[XML] Tag DOCTYPE opened but not closed.') |
|---|
| 393 | end |
|---|
| 394 | end |
|---|
| 395 | %- Skip prolog from the xml string |
|---|
| 396 | str = str(b:end); |
|---|
| 397 | |
|---|
| 398 | %----------------------------------------------------------------------- |
|---|
| 399 | function str = strip(str) |
|---|
| 400 | a = isspace(str); |
|---|
| 401 | a = find(a==1); |
|---|
| 402 | str(a) = ''; |
|---|
| 403 | |
|---|
| 404 | %----------------------------------------------------------------------- |
|---|
| 405 | function str = normalize(str) |
|---|
| 406 | % Find white characters (space, newline, carriage return, tabs, ...) |
|---|
| 407 | i = isspace(str); |
|---|
| 408 | i = find(i == 1); |
|---|
| 409 | str(i) = ' '; |
|---|
| 410 | % replace several white characters by only one |
|---|
| 411 | if ~isempty(i) |
|---|
| 412 | j = i - [i(2:end) i(end)]; |
|---|
| 413 | k = find(j == -1); |
|---|
| 414 | str(i(k)) = []; |
|---|
| 415 | end |
|---|
| 416 | |
|---|
| 417 | %----------------------------------------------------------------------- |
|---|
| 418 | function str = entity(str) |
|---|
| 419 | str = strrep(str,'<','<'); |
|---|
| 420 | str = strrep(str,'>','>'); |
|---|
| 421 | str = strrep(str,'"','"'); |
|---|
| 422 | str = strrep(str,''',''''); |
|---|
| 423 | str = strrep(str,'&','&'); |
|---|
| 424 | |
|---|
| 425 | %----------------------------------------------------------------------- |
|---|
| 426 | function str = erode(str) |
|---|
| 427 | if ~isempty(str) & str(1)==' ' str(1)=''; end; |
|---|
| 428 | if ~isempty(str) & str(end)==' ' str(end)=''; end; |
|---|