Core  3.2
PHP API documentation
 All Data Structures Namespaces Files Functions Variables Pages
Class.XMLSplitter.php
Go to the documentation of this file.
1 <?php
2 /*
3  * @author Anakeen
4  * @package FDL
5 */
6 
8 {
9  /**
10  * Last error message
11  * @var string
12  */
13  public $errmsg = '';
14  /**
15  * The xml_parser
16  * @var resource
17  */
18  private $xml_parser = false;
19  /**
20  * The XML input file pathname
21  * @var string
22  */
23  private $in_file = false;
24  /**
25  * The XML input file filedescriptor
26  * @var resource
27  */
28  private $in_fh = false;
29  /**
30  * The current XML output file pathname
31  * @var string
32  */
33  private $out_file = false;
34  /**
35  * The current XML output file filedescriptor
36  * @var resource
37  */
38  private $out_fh = false;
39  /**
40  * The current XML node depth
41  * @var integer
42  */
43  private $depth = 0;
44 
45  private $fileIndex = 0;
46  /**
47  * By default, escape only '<', '>' and '&' chars,
48  * in attribute values, for compatibility with the
49  * extractFileFromXmlDocument() function.
50  * @var boolean
51  */
52  private $compatibilityEscape = true;
53  /**
54  * Split a XML file into smaller XML files: each child node of the root node will be
55  * stored into a dedicated file named after the node 'name' or 'id' attribute.
56  *
57  * @param string $splitdir the directory in which the nodes XML files will be produced
58  * @throws Dcp\Exception
59  */
60  public function __construct($splitdir)
61  {
62  if (!is_dir($splitdir) || !is_writable($splitdir)) {
63  $this->errmsg = sprintf(_("Invalid directory '%s'.") , $splitdir);
64  throw new Dcp\Exception($this->errmsg);
65  }
66 
67  $this->splitdir = $splitdir;
68 
69  $this->xml_parser = xml_parser_create('UTF-8');
70  xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, 0);
71  xml_parser_set_option($this->xml_parser, XML_OPTION_SKIP_WHITE, 0);
72 
73  xml_set_object($this->xml_parser, $this);
74  xml_set_element_handler($this->xml_parser, "startElement", "endElement");
75  xml_set_character_data_handler($this->xml_parser, "characterData");
76  }
77  /**
78  * Parse the given XML input file and produce the XML output files.
79  *
80  * @param string $file the XML input file
81  * @throws Exception
82  */
83  public function split($file)
84  {
85  $this->open($file);
86 
87  try {
88  $eof = false;
89  while (!$eof) {
90  if (($data = fread($this->in_fh, 8192)) === false) {
91  $this->errmsg = sprintf(_("Error reading from file '%s'") , $this->in_file);
92  throw new Dcp\Exception($this->errmsg);
93  }
94  $eof = feof($this->in_fh);
95  if (!xml_parse($this->xml_parser, $data, $eof)) {
96  $this->errmsg = sprintf(_("XML error %s at line %d") , xml_error_string(xml_get_error_code($this->xml_parser)) , xml_get_current_line_number($this->xml_parser));
97  throw new Dcp\Exception($this->errmsg);
98  }
99  }
100  }
101  catch(Exception $e) {
102  $this->close();
103  throw $e;
104  }
105  $this->close();
106  }
107  /**
108  * Close all files opened by the XML Splitter.
109  *
110  * @return void
111  */
112  public function close()
113  {
114  if ($this->xml_parser !== false) {
115  xml_parser_free($this->xml_parser);
116  }
117  $this->xml_parser = false;
118  $this->closeOutputFile();
119  $this->closeInputFile();
120  }
121  /**
122  * Escape XML reserved chars into their corresponding entities
123  *
124  * @param string $str the string to escape
125  * @return string the string with reserved characters escaped
126  */
127  static public function escapeEntities($str)
128  {
129  $str = str_replace(array(
130  '&',
131  '"',
132  "'",
133  '<',
134  '>'
135  ) , array(
136  '&amp;',
137  '&quot;',
138  '&apos;',
139  '&lt;',
140  '&gt;'
141  ) , $str);
142  return $str;
143  }
144  /**
145  * Unescape XML entities into their corresponding char
146  *
147  * @param string $str the string to unescape
148  * @return string the string with the XML entities converted back to their characters
149  */
150  static public function unescapeEntities($str)
151  {
152  $str = str_replace(array(
153  '&quot;',
154  '&apos;',
155  '&lt;',
156  '&gt;',
157  '&amp;'
158  ) , array(
159  '"',
160  "'",
161  '<',
162  '>',
163  '&'
164  ) , $str);
165  return $str;
166  }
167  /**
168  * Escape only '<', '>' and '&' chars for compatibility
169  * with the extractFileFromXmlDocument() function.
170  *
171  * @param string $str the string to escape
172  * @return string the string with reserved characters escaped
173  */
174  static private function compatibilityEscapeEntities($str)
175  {
176  $str = str_replace(array(
177  '&',
178  '<',
179  '>'
180  ) , array(
181  '&amp;',
182  '&lt;',
183  '&gt;'
184  ) , $str);
185  return $str;
186  }
187  /**
188  * Open the input XML file that will be splitted
189  *
190  * @param string $file the XML input file pathname
191  * @throws Dcp\Exception
192  * @return void
193  */
194  private function open($file)
195  {
196  $this->in_file = $file;
197  $this->in_fh = fopen($this->in_file, "r");
198  if ($this->in_fh === false) {
199  $this->errmsg = sprintf(_("Could not open '%s' for reading.") , $this->in_file);
200  throw new Dcp\Exception($this->errmsg);
201  }
202  }
203  /**
204  * Write a string to the current output file.
205  *
206  * @param string $str the string to write
207  * @throws Dcp\Exception
208  * @return void
209  */
210  private function writeOutputFile($str)
211  {
212  if ($this->out_fh !== false) {
213  $ret = fwrite($this->out_fh, $str);
214  if ($ret === false) {
215  $this->errmsg = sprintf(_("Error writing to ouput file '%s'.") , $this->out_file);
216  throw new Dcp\Exception($this->errmsg);
217  }
218  }
219  }
220  /**
221  * Write a starting node.
222  *
223  * @param array $node array structure containing the starting node element
224  * @return void
225  */
226  private function printStartNode(array $node)
227  {
228  $attrList = array();
229  foreach ($node['attrs'] as $name => $value) {
230 
231  $value = $this::escapeEntities($value);
232 
233  $attrList[] = sprintf('%s="%s"', $name, $value);
234  }
235  $str = sprintf("<%s%s%s>", $node['name'], count($attrList) > 0 ? ' ' : '', join(" ", $attrList));
236  $this->writeOutputFile($str);
237  }
238  /**
239  * Write an ending node.
240  *
241  * @param array $node array structure containing the ending node element
242  * @return void
243  */
244  private function printEndNode(array $node)
245  {
246  $str = sprintf("</%s>\n", $node['name']);
247  $this->writeOutputFile($str);
248  }
249  /**
250  * Write data node
251  *
252  * @param string $data the node data
253  * @return void
254  */
255  private function printData($data)
256  {
257  $str = $data;
258  if ($this->compatibilityEscape) {
259  $str = $this::compatibilityEscapeEntities($str);
260  } else {
261  $str = $this::escapeEntities($str);
262  }
263  $this->writeOutputFile($str);
264  }
265  /**
266  * XML parser handler for character data
267  *
268  * @param resource $parser the XML parser resource
269  * @param string $data the data extracted by the parser
270  * @return void
271  */
272  /** @noinspection PhpUnusedPrivateMethodInspection */
273  private function characterData(&$parser, $data)
274  {
275  if ($this->depth >= 1) {
276  $this->printData($data);
277  }
278  }
279  /**
280  * XML Parser handler for start element
281  *
282  * @param resource $parser the XML parser resource
283  * @param string $name the current opening tag name
284  * @param array $attrs the current opening tag attributes
285  * @throws Dcp\Exception
286  * @return void
287  */
288  /** @noinspection PhpUnusedPrivateMethodInspection */
289  private function startElement(&$parser, $name, $attrs)
290  {
291  $node = array(
292  'name' => $name,
293  'attrs' => $attrs
294  );
295  $this->depth++;
296  if ($this->depth == 1 && $node['name'] != 'documents') {
297  $this->errmsg = sprintf(_("XML Root node is not a '%s' node (root node is '%s').") , 'documents', $node['name']);
298  throw new Dcp\Exception($this->errmsg);
299  }
300  if ($this->depth == 2) {
301  $this->openOutputFile($node);
302  }
303  $this->printStartNode($node);
304  }
305  /**
306  * XML Parser handler for end element
307  *
308  * @param resource $parser the XML parser resource
309  * @param string $name the current closing tag name
310  * @return void
311  */
312  /** @noinspection PhpUnusedPrivateMethodInspection */
313  private function endElement(&$parser, $name)
314  {
315  $node = array(
316  'name' => $name
317  );
318  $this->depth--;
319  $this->printEndNode($node);
320  if ($this->depth == 1) {
321  $this->closeOutputFile();
322  }
323  }
324  /**
325  * Create and open an output file for the given starting node
326  *
327  * The output file name is composed from the 'name' or 'id' attribute.
328  * If there are no name/id attributes, then a random string is used.
329  *
330  * @param array $node the current starting node
331  * @throws Dcp\Exception
332  * @return void
333  */
334  private function openOutputFile(array $node)
335  {
336  if ($this->out_fh !== false) {
337  $this->errmsg = sprintf(_("Output file '%s' is already opened.") , $this->out_file);
338  error_log($this->errmsg);
339  throw new Dcp\Exception($this->errmsg);
340  }
341 
342  if (isset($node['attrs']['name']) && preg_match('/^[a-zA-Z0-9_-]+$/', $node['attrs']['name'])) {
343  $fname = $node['attrs']['name'];
344  } elseif (isset($node['attrs']['id']) && preg_match('/^[0-9]+$/', $node['attrs']['id'])) {
345  $fname = $node['attrs']['id'];
346  } else {
347  $fname = uniqid("new");
348  }
349 
350  if ($fname == '') {
351  $this->errmsg = sprintf(_("Could not generate output file name for node '%s'.") , $node['name']);
352  throw new Dcp\Exception($this->errmsg);
353  }
354 
355  $this->out_file = sprintf("%s%s%05d%s.xml", $this->splitdir, DIRECTORY_SEPARATOR, $this->fileIndex++, $fname);
356  if (is_file($this->out_file)) {
357  $this->errmsg = sprintf(_("Output file '%s' already exists.") , $this->out_file);
358  throw new Dcp\Exception($this->errmsg);
359  }
360  $this->out_fh = fopen($this->out_file, 'wx');
361  if ($this->out_fh === false) {
362  $this->errmsg = sprintf(_("Xml import : Cannot create file %s") , $this->out_file);
363  throw new Dcp\Exception($this->errmsg);
364  }
365  $this->writeOutputFile("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
366  }
367  /**
368  * Close the XML input file
369  *
370  * @return void
371  */
372  private function closeInputFile()
373  {
374  if ($this->in_fh !== false) {
375  fclose($this->in_fh);
376  }
377  $this->in_fh = false;
378  $this->in_file = false;
379  }
380  /**
381  * Close the current node output file
382  *
383  * @return void
384  */
385  private function closeOutputFile()
386  {
387  if ($this->out_fh !== false) {
388  fclose($this->out_fh);
389  }
390  $this->out_fh = false;
391  $this->out_file = false;
392  }
393 }
static escapeEntities($str)
Exception class use exceptionCode to identifiy correctly exception.
Definition: exceptions.php:19
$ret
$file
if(is_numeric($parms['famid'])) $attrList
static unescapeEntities($str)
$value
$data
__construct($splitdir)
← centre documentaire © anakeen