1 | function tree = xml_parser(filename) |
---|
2 | % XML (eXtensible Markup Language) Processor |
---|
3 | % FORMAT tree = xml_parser(filename) |
---|
4 | % |
---|
5 | % filename - XML file to parse |
---|
6 | % tree - tree structure corresponding to the XML file |
---|
7 | %_______________________________________________________________________ |
---|
8 | % |
---|
9 | % xml_parser.m is an XML 1.0 (http://www.w3.org/TR/REC-xml) parser |
---|
10 | % written in Matlab. It aims to be fully conforming. It is currently not |
---|
11 | % a validating XML processor. |
---|
12 | % (based on a Javascript parser available at http://www.jeremie.com) |
---|
13 | % |
---|
14 | % A description of the tree structure provided in output is detailed in |
---|
15 | % the header of this m-file. |
---|
16 | %_______________________________________________________________________ |
---|
17 | % @(#)xml_parser.m Guillaume Flandin 2002/04/04 |
---|
18 | |
---|
19 | % XML Processor for MATLAB (The Mathworks, Inc.). |
---|
20 | % Copyright (C) 2002 Guillaume Flandin |
---|
21 | % |
---|
22 | % This program is free software; you can redistribute it and/or |
---|
23 | % modify it under the terms of the GNU General Public License |
---|
24 | % as published by the Free Software Foundation; either version 2 |
---|
25 | % of the License, or any later version. |
---|
26 | % |
---|
27 | % This program is distributed in the hope that it will be useful, |
---|
28 | % but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
29 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
30 | % GNU General Public License for more details. |
---|
31 | % |
---|
32 | % You should have received a copy of the GNU General Public License |
---|
33 | % along with this program; if not, write to the Free Software |
---|
34 | % Foundation Inc, 59 Temple Pl. - Suite 330, Boston, MA 02111-1307, USA. |
---|
35 | %----------------------------------------------------------------------- |
---|
36 | |
---|
37 | % Please feel free to email the author any comment/suggestion/bug report |
---|
38 | % to improve this XML processor in Matlab. |
---|
39 | % Email: Guillaume.Flandin@sophia.inria.fr |
---|
40 | % Check also the latest developments on the following webpage: |
---|
41 | % http://www-sop.inria.fr/epidaure/personnel/flandin/xml/ |
---|
42 | %----------------------------------------------------------------------- |
---|
43 | |
---|
44 | % A mex-file xml_findstr.c is also required, to encompass some |
---|
45 | % limitations of the built-in findstr Matlab function. |
---|
46 | % Compile it on your architecture using 'mex -O xml_findstr.c' command |
---|
47 | % if the compiled version for your system is not provided. |
---|
48 | % If this function behaves badly (crash or wrong results), comment the |
---|
49 | % line '#define __HACK_MXCHAR__' in xml_findstr.c and compile it again. |
---|
50 | %----------------------------------------------------------------------- |
---|
51 | |
---|
52 | % Structure of the output tree: |
---|
53 | % There are 5 types of nodes in an XML file: element, chardata, cdata, |
---|
54 | % pi and comment. |
---|
55 | % Each of them contains an UID (Unique Identifier): an integer between |
---|
56 | % 1 and the number of nodes of the XML file. |
---|
57 | % |
---|
58 | % element (a tag <name key="value"> [contents] </name> |
---|
59 | % |_ type: 'element' |
---|
60 | % |_ name: string |
---|
61 | % |_ attributes: cell array of struct 'key' and 'value' or [] |
---|
62 | % |_ contents: double array of uid's or [] if empty |
---|
63 | % |_ parent: uid of the parent ([] if root) |
---|
64 | % |_ uid: double |
---|
65 | % |
---|
66 | % chardata (a character array) |
---|
67 | % |_ type: 'chardata' |
---|
68 | % |_ value: string |
---|
69 | % |_ parent: uid of the parent |
---|
70 | % |_ uid: double |
---|
71 | % |
---|
72 | % cdata (a litteral string <![CDATA[value]]>) |
---|
73 | % |_ type: 'cdata' |
---|
74 | % |_ value: string |
---|
75 | % |_ parent: uid of the parent |
---|
76 | % |_ uid: double |
---|
77 | % |
---|
78 | % pi (a processing instruction <?target value ?>) |
---|
79 | % |_ type: 'pi' |
---|
80 | % |_ target: string (may be empty) |
---|
81 | % |_ value: string |
---|
82 | % |_ parent: uid of the parent |
---|
83 | % |_ uid: double |
---|
84 | % |
---|
85 | % comment (a comment <!-- value -->) |
---|
86 | % |_ type: 'comment' |
---|
87 | % |_ value: string |
---|
88 | % |_ parent: uid of the parent |
---|
89 | % |_ uid: double |
---|
90 | % |
---|
91 | %----------------------------------------------------------------------- |
---|
92 | |
---|
93 | % TODO/BUG/FEATURES: |
---|
94 | % - [compile] only a warning if TagStart is empty |
---|
95 | % - [attribution] should look for " and ' rather than only " |
---|
96 | % - [main] with normalize as a preprocessing, CDATA are modified |
---|
97 | % - [prolog] look for a DOCTYPE in the whole string even if it occurs |
---|
98 | % only in a far CDATA tag (for example)... |
---|
99 | % - [tag_element] erode should replace normalize here |
---|
100 | % - remove globals? uppercase globals rather persistent (clear mfile)? |
---|
101 | % - xml_findst is in fact xml_strfind according to Mathworks vocabulary |
---|
102 | % - problem with entity (don't know if the bug is here or in save fct.) |
---|
103 | %----------------------------------------------------------------------- |
---|
104 | |
---|
105 | %- XML string to parse and number of tags read |
---|
106 | global xmlstring Xparse_count xtree; |
---|
107 | |
---|
108 | %- Check input arguments |
---|
109 | error(nargchk(1,1,nargin)); |
---|
110 | if isempty(filename) |
---|
111 | error('Not enough parameters.') |
---|
112 | elseif ~isstr(filename) | sum(size(filename)>1)>1 |
---|
113 | error('Input must be a string filename.') |
---|
114 | end |
---|
115 | |
---|
116 | %- Read the entire XML file |
---|
117 | fid = fopen(filename,'rt'); |
---|
118 | if (fid==-1) |
---|
119 | error(sprintf('Cannot open %s for reading.',filename)) |
---|
120 | end |
---|
121 | xmlstring = fscanf(fid,'%c'); |
---|
122 | fclose(fid); |
---|
123 | |
---|
124 | %- Initialize number of tags (<=> uid) |
---|
125 | Xparse_count = 0; |
---|
126 | |
---|
127 | %- Remove prolog and white space characters from the XML string |
---|
128 | xmlstring = normalize(prolog(xmlstring)); |
---|
129 | |
---|
130 | %- Initialize the XML tree |
---|
131 | xtree = {}; |
---|
132 | tree = fragment; |
---|
133 | tree.str = 1; |
---|
134 | tree.parent = 0; |
---|
135 | |
---|
136 | %- Parse the XML string |
---|
137 | tree = compile(tree); |
---|
138 | |
---|
139 | %- Return the XML tree |
---|
140 | tree = xtree; |
---|
141 | |
---|
142 | %- Remove global variables from the workspace |
---|
143 | clear global xmlstring Xparse_count xtree; |
---|
144 | |
---|
145 | %======================================================================= |
---|
146 | % SUBFUNCTIONS |
---|
147 | |
---|
148 | %----------------------------------------------------------------------- |
---|
149 | function frag = compile(frag) |
---|
150 | global xmlstring xtree Xparse_count; |
---|
151 | |
---|
152 | while 1, |
---|
153 | if length(xmlstring)<=frag.str | ... |
---|
154 | (frag.str == length(xmlstring)-1 & strcmp(xmlstring(frag.str:end),' ')) |
---|
155 | return |
---|
156 | end |
---|
157 | TagStart = xml_findstr(xmlstring,'<',frag.str,1); |
---|
158 | if isempty(TagStart) |
---|
159 | %- Character data (should be an error) |
---|
160 | warning('[XML] Unknown data at the end of the XML file.'); |
---|
161 | fprintf('Please send me your XML file at gflandin@sophia.inria.fr\n'); |
---|
162 | %thisary = length(frag.ary) + 1; |
---|
163 | xtree{Xparse_count+1} = chardata; |
---|
164 | xtree{Xparse_count}.value = erode(entity(xmlstring(frag.str:end))); |
---|
165 | xtree{Xparse_count}.parent = frag.parent; |
---|
166 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
---|
167 | %frag.str = ''; |
---|
168 | elseif TagStart > frag.str |
---|
169 | if strcmp(xmlstring(frag.str:TagStart-1),' ') |
---|
170 | %- A single white space before a tag (ignore) |
---|
171 | frag.str = TagStart; |
---|
172 | else |
---|
173 | %- Character data |
---|
174 | xtree{Xparse_count} = chardata; |
---|
175 | xtree{Xparse_count}.value = erode(entity(xmlstring(frag.str:TagStart-1))); |
---|
176 | xtree{Xparse_count}.parent = frag.parent; |
---|
177 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
---|
178 | frag.str = TagStart; |
---|
179 | end |
---|
180 | else |
---|
181 | if strcmp(xmlstring(frag.str+1),'?') |
---|
182 | %- Processing instruction |
---|
183 | frag = tag_pi(frag); |
---|
184 | else |
---|
185 | if length(xmlstring)-frag.str>4 & strcmp(xmlstring(frag.str+1:frag.str+3),'!--') |
---|
186 | %- Comment |
---|
187 | frag = tag_comment(frag); |
---|
188 | else |
---|
189 | if length(xmlstring)-frag.str>9 & strcmp(xmlstring(frag.str+1:frag.str+8),'![CDATA[') |
---|
190 | %- Litteral data |
---|
191 | frag = tag_cdata(frag); |
---|
192 | else |
---|
193 | %- A tag element (empty (<.../>) or not) |
---|
194 | if ~isempty(frag.end) |
---|
195 | endmk = ['/' frag.end '>']; |
---|
196 | else |
---|
197 | endmk = '/>'; |
---|
198 | end |
---|
199 | if strcmp(xmlstring(frag.str+1:frag.str+length(frag.end)+2),endmk) | ... |
---|
200 | strcmp(strip(xmlstring(frag.str+1:frag.str+length(frag.end)+2)),endmk) |
---|
201 | frag.str = frag.str + length(frag.end)+3; |
---|
202 | return |
---|
203 | else |
---|
204 | frag = tag_element(frag); |
---|
205 | end |
---|
206 | end |
---|
207 | end |
---|
208 | end |
---|
209 | end |
---|
210 | end |
---|
211 | |
---|
212 | %----------------------------------------------------------------------- |
---|
213 | function frag = tag_element(frag) |
---|
214 | global xmlstring xtree Xparse_count; |
---|
215 | close = xml_findstr(xmlstring,'>',frag.str,1); |
---|
216 | if isempty(close) |
---|
217 | error('[XML] Tag < opened but not closed.'); |
---|
218 | else |
---|
219 | empty = strcmp(xmlstring(close-1:close),'/>'); |
---|
220 | if empty |
---|
221 | close = close - 1; |
---|
222 | end |
---|
223 | starttag = normalize(xmlstring(frag.str+1:close-1)); |
---|
224 | nextspace = xml_findstr(starttag,' ',1,1); |
---|
225 | attribs = ''; |
---|
226 | if isempty(nextspace) |
---|
227 | name = starttag; |
---|
228 | else |
---|
229 | name = starttag(1:nextspace-1); |
---|
230 | attribs = starttag(nextspace+1:end); |
---|
231 | end |
---|
232 | xtree{Xparse_count} = element; |
---|
233 | xtree{Xparse_count}.name = strip(name); |
---|
234 | if frag.parent |
---|
235 | xtree{Xparse_count}.parent = frag.parent; |
---|
236 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
---|
237 | end |
---|
238 | if length(attribs) > 0 |
---|
239 | xtree{Xparse_count}.attributes = attribution(attribs); |
---|
240 | end |
---|
241 | if ~empty |
---|
242 | contents = fragment; |
---|
243 | contents.str = close+1; |
---|
244 | contents.end = name; |
---|
245 | contents.parent = Xparse_count; |
---|
246 | contents = compile(contents); |
---|
247 | frag.str = contents.str; |
---|
248 | else |
---|
249 | frag.str = close+2; |
---|
250 | end |
---|
251 | end |
---|
252 | |
---|
253 | %----------------------------------------------------------------------- |
---|
254 | function frag = tag_pi(frag) |
---|
255 | global xmlstring xtree Xparse_count; |
---|
256 | close = xml_findstr(xmlstring,'?>',frag.str,1); |
---|
257 | if isempty(close) |
---|
258 | warning('[XML] Tag <? opened but not closed.') |
---|
259 | else |
---|
260 | nextspace = xml_findstr(xmlstring,' ',frag.str,1); |
---|
261 | xtree{Xparse_count} = pri; |
---|
262 | if nextspace > close | nextspace == frag.str+2 |
---|
263 | xtree{Xparse_count}.value = erode(xmlstring(frag.str+2:close-1)); |
---|
264 | else |
---|
265 | xtree{Xparse_count}.value = erode(xmlstring(nextspace+1:close-1)); |
---|
266 | xtree{Xparse_count}.target = erode(xmlstring(frag.str+2:nextspace)); |
---|
267 | end |
---|
268 | if frag.parent |
---|
269 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
---|
270 | xtree{Xparse_count}.parent = frag.parent; |
---|
271 | end |
---|
272 | frag.str = close+2; |
---|
273 | end |
---|
274 | |
---|
275 | %----------------------------------------------------------------------- |
---|
276 | function frag = tag_comment(frag) |
---|
277 | global xmlstring xtree Xparse_count; |
---|
278 | close = xml_findstr(xmlstring,'-->',frag.str,1); |
---|
279 | if isempty(close) |
---|
280 | warning('[XML] Tag <!-- opened but not closed.') |
---|
281 | else |
---|
282 | xtree{Xparse_count} = comment; |
---|
283 | xtree{Xparse_count}.value = erode(xmlstring(frag.str+4:close-1)); |
---|
284 | if frag.parent |
---|
285 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
---|
286 | xtree{Xparse_count}.parent = frag.parent; |
---|
287 | end |
---|
288 | frag.str = close+3; |
---|
289 | end |
---|
290 | |
---|
291 | %----------------------------------------------------------------------- |
---|
292 | function frag = tag_cdata(frag) |
---|
293 | global xmlstring xtree Xparse_count; |
---|
294 | close = xml_findstr(xmlstring,']]>',frag.str,1); |
---|
295 | if isempty(close) |
---|
296 | warning('[XML] Tag <![CDATA[ opened but not closed.') |
---|
297 | else |
---|
298 | xtree{Xparse_count} = cdata; |
---|
299 | xtree{Xparse_count}.value = xmlstring(frag.str+9:close-1); |
---|
300 | if frag.parent |
---|
301 | xtree{frag.parent}.contents = [xtree{frag.parent}.contents Xparse_count]; |
---|
302 | xtree{Xparse_count}.parent = frag.parent; |
---|
303 | end |
---|
304 | frag.str = close+3; |
---|
305 | end |
---|
306 | |
---|
307 | %----------------------------------------------------------------------- |
---|
308 | function all = attribution(str) |
---|
309 | %- Initialize attributs |
---|
310 | nbattr = 0; |
---|
311 | all = cell(nbattr); |
---|
312 | %- Look for 'key="value"' substrings |
---|
313 | while 1, |
---|
314 | eq = xml_findstr(str,'=',1,1); |
---|
315 | if isempty(str) | isempty(eq), return; end |
---|
316 | id = xml_findstr(str,'"',1,1); % should also look for '''' |
---|
317 | nextid = xml_findstr(str,'"',id+1,1);% rather than only '"' |
---|
318 | nbattr = nbattr + 1; |
---|
319 | all{nbattr}.key = strip(str(1:(eq-1))); |
---|
320 | all{nbattr}.val = entity(str((id+1):(nextid-1))); |
---|
321 | str = str((nextid+1):end); |
---|
322 | end |
---|
323 | |
---|
324 | %----------------------------------------------------------------------- |
---|
325 | function elm = element |
---|
326 | global Xparse_count; |
---|
327 | Xparse_count = Xparse_count + 1; |
---|
328 | elm = struct('type','element','name','','attributes',[],'contents',[],'parent',[],'uid',Xparse_count); |
---|
329 | |
---|
330 | %----------------------------------------------------------------------- |
---|
331 | function cdat = chardata |
---|
332 | global Xparse_count; |
---|
333 | Xparse_count = Xparse_count + 1; |
---|
334 | cdat = struct('type','chardata','value','','parent',[],'uid',Xparse_count); |
---|
335 | |
---|
336 | %----------------------------------------------------------------------- |
---|
337 | function cdat = cdata |
---|
338 | global Xparse_count; |
---|
339 | Xparse_count = Xparse_count + 1; |
---|
340 | cdat = struct('type','cdata','value','','parent',[],'uid',Xparse_count); |
---|
341 | |
---|
342 | %----------------------------------------------------------------------- |
---|
343 | function proce = pri |
---|
344 | global Xparse_count; |
---|
345 | Xparse_count = Xparse_count + 1; |
---|
346 | proce = struct('type','pi','value','','target','','parent',[],'uid',Xparse_count); |
---|
347 | |
---|
348 | %----------------------------------------------------------------------- |
---|
349 | function commt = comment |
---|
350 | global Xparse_count; |
---|
351 | Xparse_count = Xparse_count + 1; |
---|
352 | commt = struct('type','comment','value','','parent',[],'uid',Xparse_count); |
---|
353 | |
---|
354 | %----------------------------------------------------------------------- |
---|
355 | function frg = fragment |
---|
356 | frg = struct('str','','parent','','end',''); |
---|
357 | |
---|
358 | %----------------------------------------------------------------------- |
---|
359 | function str = prolog(str) |
---|
360 | %- Initialize beginning index of elements tree |
---|
361 | b = 1; |
---|
362 | %- Initial tag |
---|
363 | start = xml_findstr(str,'<',1,1); |
---|
364 | if isempty(start) |
---|
365 | error('[XML] No tag found.') |
---|
366 | end |
---|
367 | %- Header (<?xml version="1.0" ... ?>) |
---|
368 | if strcmp(lower(str(start:start+2)),'<?x') |
---|
369 | close = xml_findstr(str,'?>',1,1); |
---|
370 | if ~isempty(close) |
---|
371 | b = close + 2; |
---|
372 | else |
---|
373 | warning('[XML] Header tag incomplete.') |
---|
374 | end |
---|
375 | end |
---|
376 | %- Doctype (<!DOCTYPE type ... [ declarations ]>) |
---|
377 | start = xml_findstr(str,'<!DOCTYPE',b,1); % length('<!DOCTYPE') = 9 |
---|
378 | if ~isempty(start) |
---|
379 | close = xml_findstr(str,'>',start+9,1); |
---|
380 | if ~isempty(close) |
---|
381 | b = close + 1; |
---|
382 | dp = xml_findstr(str,'[',start+9,1); |
---|
383 | if (~isempty(dp) & dp < b) |
---|
384 | k = xml_findstr(str,']>',start+9,1); |
---|
385 | if ~isempty(k) |
---|
386 | b = k + 2; |
---|
387 | else |
---|
388 | warning('[XML] Tag [ in DOCTYPE opened but not closed.') |
---|
389 | end |
---|
390 | end |
---|
391 | else |
---|
392 | warning('[XML] Tag DOCTYPE opened but not closed.') |
---|
393 | end |
---|
394 | end |
---|
395 | %- Skip prolog from the xml string |
---|
396 | str = str(b:end); |
---|
397 | |
---|
398 | %----------------------------------------------------------------------- |
---|
399 | function str = strip(str) |
---|
400 | a = isspace(str); |
---|
401 | a = find(a==1); |
---|
402 | str(a) = ''; |
---|
403 | |
---|
404 | %----------------------------------------------------------------------- |
---|
405 | function str = normalize(str) |
---|
406 | % Find white characters (space, newline, carriage return, tabs, ...) |
---|
407 | i = isspace(str); |
---|
408 | i = find(i == 1); |
---|
409 | str(i) = ' '; |
---|
410 | % replace several white characters by only one |
---|
411 | if ~isempty(i) |
---|
412 | j = i - [i(2:end) i(end)]; |
---|
413 | k = find(j == -1); |
---|
414 | str(i(k)) = []; |
---|
415 | end |
---|
416 | |
---|
417 | %----------------------------------------------------------------------- |
---|
418 | function str = entity(str) |
---|
419 | str = strrep(str,'<','<'); |
---|
420 | str = strrep(str,'>','>'); |
---|
421 | str = strrep(str,'"','"'); |
---|
422 | str = strrep(str,''',''''); |
---|
423 | str = strrep(str,'&','&'); |
---|
424 | |
---|
425 | %----------------------------------------------------------------------- |
---|
426 | function str = erode(str) |
---|
427 | if ~isempty(str) & str(1)==' ' str(1)=''; end; |
---|
428 | if ~isempty(str) & str(end)==' ' str(end)=''; end; |
---|