Core  3.2
PHP API documentation
 All Data Structures Namespaces Files Functions Variables Pages
Class.ImportXml.php
Go to the documentation of this file.
1 <?php
2 /*
3  * @author Anakeen
4  * @package FDL
5 */
6 /**
7  * Import directory with document descriptions
8  *
9  * @author Anakeen
10  * @version $Id: freedom_import_dir.php,v 1.5 2007/01/19 16:23:32 eric Exp $
11  * @package FDL
12  * @subpackage GED
13  */
14 /**
15  */
16 
17 namespace Dcp\Core;
18 include_once ("FDL/import_tar.php");
19 include_once ('FDL/Class.XMLSplitter.php');
20 
21 class importXml
22 {
23  protected $analyze = false;
24  protected $policy = "update";
25  protected $dirid = 0;
26  protected $verifyAttributeAccess = true;
27  public function analyzeOnly($analyze)
28  {
29  $this->analyze = $analyze;
30  }
31 
32  public function setPolicy($policy)
33  {
34  if (!$policy) {
35  $policy = "update";
36  }
37  $this->policy = $policy;
38  }
39 
40  public function setImportDirectory($dirid)
41  {
42  $this->dirid = $dirid;
43  }
44  /**
45  * @param boolean $verifyAttributeAccess
46  */
48  {
49  $this->verifyAttributeAccess = $verifyAttributeAccess;
50  }
51  /**
52  * @param string $xmlFile file path
53  * @return array log infortmations about import
54  * @throws Exception
55  * @throws \Dcp\Exception
56  */
57  public function importSingleXmlFile($xmlFile)
58  {
59  $splitdir = uniqid(getTmpDir() . "/xmlsplit");
60  @mkdir($splitdir);
61  if (!is_dir($splitdir)) {
62  throw new \Dcp\Exception("IMPC0002", $splitdir);
63  }
64  self::splitXmlDocument($xmlFile, $splitdir);
65 
67 
68  $log = $this->importXmlDirectory($splitdir);
69  system(sprintf("/bin/rm -fr %s ", $splitdir));
70  // print "look : $splitdir\n";
71  return $log;
72  }
73 
74  public function importZipFile($zipFile)
75  {
76  $splitdir = uniqid(getTmpDir() . "/xmlsplit");
77  @mkdir($splitdir);
78  if (!is_dir($splitdir)) {
79  throw new \Dcp\Exception("IMPC0003", $splitdir);
80  }
81  self::unZipXmlDocument($zipFile, $splitdir);
82  //print "Split OK in $splitdir";
84 
85  $log = $this->importXmlDirectory($splitdir);
86  system(sprintf("/bin/rm -fr %s ", $splitdir));
87  //print "look : $splitdir\n";
88  return $log;
89  }
90 
91  public static function unZipXmlDocument($zipfiles, $splitdir)
92  {
93  $err = "";
94  $zipfiles = realpath($zipfiles);
95  $ll = exec(sprintf("cd %s && unzip %s", $splitdir, $zipfiles) , $out, $retval);
96  if ($retval != 0) {
97  throw new \Dcp\Exception("IMPC0004", $zipfiles, $ll);
98  }
99  return $err;
100  }
101  /**
102  * read a directory to import all xml files
103  * @param string $splitdir
104  * @return array log info
105  */
106  public function importXmlDirectory($splitdir)
107  {
108  $tlog = array();
109  if ($handle = opendir($splitdir)) {
110  $files = array();
111  while (false !== ($file = readdir($handle))) {
112  if ($file[0] != "." && is_file("$splitdir/$file")) {
113  $ext = substr($file, strrpos($file, '.') + 1);
114  if ($ext == "xml") {
115  $files[] = $file;
116  }
117  }
118  }
119  asort($files);
120  foreach ($files as $file) {
121  $this->importXmlFileDocument("$splitdir/$file", $log);
122  $tlog[] = $log;
123  }
124  closedir($handle);
125  }
126 
127  return $tlog;
128  }
129 
130  public function importXmlFileDocument($xmlfile, &$log)
131  {
132  static $families = array();
134  $log = array(
135  "err" => "",
136  "msg" => "",
137  "specmsg" => "",
138  "folderid" => 0,
139  "foldername" => "",
140  "filename" => "",
141  "title" => "",
142  "id" => "",
143  "values" => array() ,
144  "familyid" => 0,
145  "familyname" => "",
146  "action" => "-"
147  );
148 
149  if (!is_file($xmlfile)) {
150  $err = sprintf(_("Xml import file %s not found") , $xmlfile);
151  $log["err"] = $err;
152  return $err;
153  }
156  $splitdir = dirname($xmlfile);
157  $tkey = array(
158  "title"
159  );
160  $dom = new \Dcp\Utils\XDOMDocument();
161  try {
162  $dom->load($xmlfile, 0, $error);
163  }
164  catch(\Dcp\Utils\XDOMDocumentException $e) {
165  $log["action"] = 'ignored';
166  $log["err"] = $e->getMessage();
167  return $e->getMessage();
168  }
169  // print $doc->saveXML();
170  $root = $dom->documentElement;
171  $id = $root->getAttribute("id");
172  $name = $root->getAttribute("name");
173  $key = $root->getAttribute("key");
174  $folders = $root->getAttribute("folders");
175  if ($key) {
176  $tkey = explode(',', $key);
177  foreach ($tkey as & $v) {
178  $v = trim($v);
179  }
180  }
181 
182  $family = $root->tagName;
183  $famid = getFamIdFromName($dbaccess, $family);
184  if (!isset($families[$famid])) {
185  $families[$famid] = new_doc($dbaccess, $famid);
186  }
187  //print("family : $family $id $name $famid\n");
188 
189  /**
190  * @var \DocFam[] $families
191  */
192  $la = $families[$famid]->getNormalAttributes();
193  $tord = array();
194  $tdoc = array(
195  "DOC",
196  $famid,
197  ($id) ? $id : $name,
198  ''
199  );
200 
201  $rootAttrs = $root->attributes;
202 
203  foreach ($rootAttrs as $rname => $ra) {
204  $v = $root->getAttribute($rname);
205  if ($v) {
206  $tord[] = "extra:$rname";
207  $tdoc[] = $v;
208  }
209  }
210 
211  $msg = '';
212  /**
213  * @var \BasicAttribute $v
214  */
215  foreach ($la as $k => & $v) {
216  $n = $dom->getElementsByTagName($v->id);
217  $val = array();
218  /**
219  * @var \DomElement $item
220  */
221  foreach ($n as $item) {
222  switch ($v->type) {
223  case 'array':
224  break;
225 
226  case 'docid':
227  case 'account':
228  $id = $item->getAttribute("id");
229  if (!$id) {
230  $logicalName = $item->getAttribute("name");
231  $name = $item->getAttribute("name");
232  if ($name) {
233  if (strpos($name, ',') !== false) {
234  $names = explode(',', $name);
235  $lids = array();
236  foreach ($names as $lname) {
237  $lids[] = getIdFromName($dbaccess, $lname);
238  }
239  $id = implode(",", $lids);
240  } else {
241  $id = getIdFromName($dbaccess, $name);
242  }
243  }
244  if (!$id) {
245  // search from title
246  if ($item->nodeValue) {
247  $afamid = $v->format;
248  $id = getIdFromTitle($dbaccess, $item->nodeValue, $afamid);
249  if (!$id) {
250  $msg.= sprintf(_("No identifier found for relation '%s' %s in %s file") . "\n", $logicalName ? $logicalName : $item->nodeValue, $v->id, $xmlfile);
251  }
252  }
253  }
254  }
255  if ($v->getOption("multiple") == "yes") {
256  $id = str_replace(',', '\n', $id);
257  if ($v->inArray()) $id = str_replace(array(
258  '\\n',
259  "\n",
260  ) , "<BR>", $id);
261  }
262  $val[] = $id;
263  break;
264 
265  case 'image':
266  case 'file':
267  $href = $item->getAttribute("href");
268  if ($href) {
269  $val[] = $href;
270  } else {
271  $vid = $item->getAttribute("vid");
272  $mime = $item->getAttribute("mime");
273  $title = $item->getAttribute("title");
274  if ($vid) {
275  $val[] = "$mime|$vid|$title";
276  } else {
277  $val[] = '';
278  }
279  }
280  break;
281 
282  case 'htmltext':
283  $val[] = str_replace("\n", " ", str_replace(">\n", ">", $item->nodeValue));
284  break;
285 
286  default:
287  $val[] = $item->nodeValue;
288  }
289  // print $v->id.":".$item->nodeValue."\n";
290 
291  }
292  $tord[] = $v->id;
293  $tdoc[] = implode("\n", $val);
294  }
295  //$log = csvAddDoc($dbaccess, $tdoc, $importdirid, $analyze, $splitdir, $policy, $tkey, $prevalues, $tord);
296  $o = new \importSingleDocument();
297  if ($tkey) $o->setKey($tkey);
298  if ($tord) $o->setOrder($tord);
299  $o->analyzeOnly($analyze);
300  $o->setPolicy($policy);
301  $o->setFilePath($splitdir);
302  $o->setVerifyAttributeAccess($this->verifyAttributeAccess);
303  if ($folders) {
304  $folders = str_replace(',', ' ', $folders);
305  $tfolders = explode(' ', $folders);
306  foreach ($tfolders as $k => $aFolder) {
307  if (!$aFolder) unset($tfolders[$k]);
308  }
309 
310  if ($tfolders) {
311  $o->setTargetDirectories($tfolders);
312  }
313  } elseif (!empty($opt["dirid"])) {
314  $o->setTargetDirectory($opt["dirid"]);
315  }
316 
317  $o->import($tdoc);
318  $log = $o->getImportResult();
319 
320  if ($msg) {
321  $log["err"].= "\n" . $msg;
322  $log["action"] = "ignored";
323  }
324  return '';
325  }
326  public static function splitXmlDocument($xmlfiles, $splitdir)
327  {
328  $xs = new \XMLSplitter($splitdir);
329  $xs->split($xmlfiles);
330  return '';
331  }
332  public static function extractFilesFromXmlDirectory($splitdir)
333  {
334  if ($handle = opendir($splitdir)) {
335  while (false !== ($file = readdir($handle))) {
336  if ($file[0] != ".") {
337  if (!is_dir("$splitdir/$file")) {
338  self::extractFileFromXmlDocument("$splitdir/$file");
339  }
340  }
341  }
342  closedir($handle);
343  }
344  }
345  protected static function fputsError($fd, $str)
346  {
347  $len = fputs($fd, $str);
348  if ($len === false || $len != strlen($str)) {
349  $metadata = stream_get_meta_data($fd);
350  $filename = ((is_array($metadata) && isset($metadata['uri'])) ? $metadata['uri'] : '*unknown*file*');
351  fclose($fd);
352  throw new \Dcp\Exception("IMPC0012", $filename);
353  }
354  return $len;
355  }
356  /**
357  * extract encoded base 64 file from xml and put it in local media directory
358  * the file is rewrite without encoded data and replace by href attribute
359  * @param $file
360  * @throws \Dcp\Exception
361  */
362  public static function extractFileFromXmlDocument($file)
363  {
364  static $mediaindex = 0;
365  $dir = dirname($file);
366  if (!file_exists($file)) {
367  throw new \Dcp\Exception("IMPC0001", $file);
368  }
369  $mediadir = "media";
370  if (!is_dir("$dir/$mediadir")) mkdir("$dir/$mediadir");
371  $f = fopen($file, "r");
372  if ($f === false) {
373  throw new \Dcp\Exception("IMPC0009", $file);
374  }
375  $nf = fopen($file . ".new", "w");
376  if ($nf === false) {
377  throw new \Dcp\Exception("IMPC0010", $file . ".new");
378  }
379  try {
380  while (!feof($f)) {
381  $buffer = fgets($f, 4096);
382  $mediaindex++;
383  if (preg_match("/<([a-z_0-9-]+)[^>]*mime=\"[^\"]+\"(.*)>(.*)/", $buffer, $reg)) {
384  if ((substr($reg[2], -1) != "/") && (substr($reg[2], -strlen($reg[1]) - 3) != '></' . $reg[1])) { // not empty tag
385  $tag = $reg[1];
386  if (preg_match("/<([a-z_0-9-]+)[^>]*title=\"([^\"]+)\"/", $buffer, $regtitle)) {
387  $title = \XMLSplitter::unescapeEntities($regtitle[2]);
388  } else if (preg_match("/<([a-z_0-9-]+)[^>]*title='([^']+)'/", $buffer, $regtitle)) {
389  $title = \XMLSplitter::unescapeEntities($regtitle[2]);
390  } else $title = "noname";
391  if (strpos($title, DIRECTORY_SEPARATOR) !== false) {
392  throw new \Dcp\Exception("IMPC0005", DIRECTORY_SEPARATOR, $title);
393  }
394  $mediaIndexDir = sprintf("%s/%s/%d", $dir, $mediadir, $mediaindex);
395  if (!file_exists($mediaIndexDir)) {
396  if (mkdir($mediaIndexDir) === false) {
397  throw new \Dcp\Exception("IMPC0006", $mediaIndexDir);
398  }
399  }
400  if (!is_dir($mediaIndexDir)) {
401  throw new \Dcp\Exception("IMPC0007", $mediaIndexDir);
402  }
403  $rfin = sprintf("%s/%d/%s", $mediadir, $mediaindex, $title);
404  $fin = sprintf("%s/%s", $dir, $rfin);
405  $fi = fopen($fin, "w");
406  if ($fi === false) {
407  throw new \Dcp\Exception("IMPC0008", $fi);
408  }
409  if (preg_match("/(.*)(<$tag [^>]*)>/", $buffer, $regend)) {
410  self::fputsError($nf, $regend[1] . $regend[2] . ' href="' . \XMLSplitter::escapeEntities($rfin) . '">');
411  }
412  if (preg_match("/>([^<]*)<\/$tag>(.*)/", $buffer, $regend)) {
413  // end of file
414  self::fputsError($fi, $regend[1]);
415  self::fputsError($nf, "</$tag>");
416  self::fputsError($nf, $regend[2]);
417  } else {
418  // find end of file
419  self::fputsError($fi, $reg[3]);
420  $findtheend = false;
421  while (!feof($f) && (!$findtheend)) {
422  $buffer = fgets($f, 4096);
423  if (preg_match("/(.*)<\/$tag>(.*)/", $buffer, $regend)) {
424  self::fputsError($fi, $regend[1]);
425  self::fputsError($nf, "</$tag>");
426  self::fputsError($nf, $regend[2]);
427  $findtheend = true;
428  } else {
429  self::fputsError($fi, $buffer);
430  }
431  }
432  }
433  fclose($fi);
434  self::base64Decodefile($fin);
435  } else {
436  self::fputsError($nf, $buffer);
437  }
438  } else if (preg_match("/&lt;img.*?src=\"data:[^;]*;base64,(.*)/", $buffer, $reg)) {
439  if (preg_match("/&lt;img.*?title=\"([^\"]+)\"/", $buffer, $regtitle)) {
440  $title = $regtitle[1];
441  } else if (preg_match("/&lt;img.*?title='([^']+)'/", $buffer, $regtitle)) {
442  $title = $regtitle[1];
443  } else $title = "noname";
444  if (strpos($title, DIRECTORY_SEPARATOR) !== false) {
445  throw new \Dcp\Exception("IMPC0005", DIRECTORY_SEPARATOR, $title);
446  }
447  $mediaIndexDir = sprintf("%s/%s/%d", $dir, $mediadir, $mediaindex);
448  if (!file_exists($mediaIndexDir)) {
449  if (mkdir($mediaIndexDir) === false) {
450  throw new \Dcp\Exception("IMPC0006", $mediaIndexDir);
451  }
452  }
453  if (!is_dir($mediaIndexDir)) {
454  throw new \Dcp\Exception("IMPC0007", $mediaIndexDir);
455  }
456  $rfin = sprintf("%s/%d/%s", $mediadir, $mediaindex, $title);
457  $fin = sprintf("%s/%s", $dir, $rfin);
458  $fi = fopen($fin, "w");
459  if ($fi === false) {
460  throw new \Dcp\Exception("IMPC0008", $fi);
461  }
462  if (preg_match("/(.*)(&lt;img.*?)src=\"data:[^;]*;base64,/", $buffer, $regend)) {
463  $chaintoput = $regend[1] . $regend[2] . ' src="file://' . $rfin . '"';
464  self::fputsError($nf, $chaintoput);
465  }
466  if (preg_match("/&lt;img.*?src=\"data:[^;]*;base64,([^\"]*)\"(.*)/", $buffer, $regend)) {
467  // end of file
468  self::fputsError($fi, $regend[1]);
469  self::fputsError($nf, $regend[2]);
470  } else {
471  // find end of file
472  self::fputsError($fi, $reg[1]);
473  $findtheend = false;
474  while (!feof($f) && (!$findtheend)) {
475  $buffer = fgets($f, 4096);
476  if (preg_match("/([^\"]*)\"(.*)/", $buffer, $regend)) {
477  self::fputsError($fi, $regend[1]);
478  self::fputsError($nf, $regend[2]);
479  $findtheend = true;
480  } else {
481  self::fputsError($fi, $buffer);
482  }
483  }
484  }
485  fclose($fi);
486  self::base64Decodefile($fin);
487  } else {
488  self::fputsError($nf, $buffer);
489  }
490  }
491  }
492  catch(\Exception $e) {
493  fclose($f);
494  fclose($nf);
495  throw $e;
496  }
497  fclose($f);
498  fclose($nf);
499  if (rename($file . ".new", $file) === false) {
500  throw new \Dcp\Exception("IMPC0011", $file . ".new", $file);
501  }
502  }
503 
504  public static function base64Decodefile($filename)
505  {
506  $tmpdest = uniqid(getTmpDir() . "/fdlbin");
507  $chunkSize = 1024 * 30;
508  $src = fopen($filename, 'rb');
509  $dst = fopen($tmpdest, 'wb');
510  while (!feof($src)) {
511  fwrite($dst, base64_decode(fread($src, $chunkSize)));
512  }
513  fclose($dst);
514  fclose($src);
515  rename($tmpdest, $filename);
516  }
517 }
extractFilesFromXmlDirectory($splitdir)
$tdoc
static extractFileFromXmlDocument($file)
static escapeEntities($str)
setVerifyAttributeAccess($verifyAttributeAccess)
importXmlDirectory($splitdir)
$filename
$file
getIdFromTitle($dbaccess, $title, $famid="", $only=false)
splitXmlDocument($xmlfiles, $splitdir)
importSingleXmlFile($xmlFile)
$log
Definition: wsh.php:33
static fputsError($fd, $str)
static extractFilesFromXmlDirectory($splitdir)
getFamIdFromName($dbaccess, $name)
getTmpDir($def= '/tmp')
Definition: Lib.Common.php:150
if($dbaccess=="") $o
extractFileFromXmlDocument($file)
static base64Decodefile($filename)
getDbAccess()
Definition: Lib.Common.php:368
$dir
Definition: resizeimg.php:144
static unescapeEntities($str)
$dbaccess
Definition: checkVault.php:17
getIdFromName($dbaccess, $name)
if($file) if($subject==""&&$file) if($subject=="") $err
static splitXmlDocument($xmlfiles, $splitdir)
static unZipXmlDocument($zipfiles, $splitdir)
← centre documentaire © anakeen