Core  3.2
PHP API documentation
 All Data Structures Namespaces Files Functions Variables Pages
buildTools/lib/ods2csv.php
Go to the documentation of this file.
1 <?php
2 /*
3  * @author Anakeen
4  * @license http://creativecommons.org/licenses/by-nc-sa/2.0/fr/ Anakeen - licence CC
5 */
6 
7 class Ods2Csv
8 {
9 
10  const ALTSEPCHAR = ' --- ';
11  const SEPCHAR = ';';
12 
13  private $rows = array();
14  private $nrows = 0;
15  private $ncol = 0;
16  private $celldata = '';
17  private $colrepeat = false;
18  private $inrow = false;
19  private $incell = false;
20  private $cellattrs = array();
21  /**
22  * Take an ODS file and produce one CSV
23  *
24  * @param string $odsfile path to ODS file
25  * @param string $csvfile path to CSV output file
26  * @throws Exception
27  * @return void
28  */
29  public function convertOds2Csv($odsfile, $csvfile)
30  {
31  if ($odsfile === "" or !file_exists($odsfile) or $csvfile === "") {
32  throw new Exception("ODS convert needs an ODS path and a CSV path");
33  }
34 
35  $this->rows = array();
36  $this->nrows = 0;
37  $this->ncol = 0;
38  $this->colrepeat = false;
39  $this->inrow = false;
40  $this->incell = false;
41 
42  $content = $this->ods2content($odsfile);
43  $csv = $this->xmlcontent2csv($content);
44  $isWrited = file_put_contents($csvfile, $csv);
45  if ($isWrited === false) {
46  throw new Exception(sprintf("Unable to convert ODS to CSV fo %s", $odsfile));
47  }
48  }
49  /**
50  * Extract content from an ods file
51  *
52  * @param string $odsfile file path
53  * @throws Exception
54  * @return string
55  */
56  protected function ods2content($odsfile)
57  {
58  if (!file_exists($odsfile)) {
59  throw new Exception("file $odsfile not found");
60  }
61  $cibledir = uniqid("/var/tmp/ods");
62 
63  $cmd = sprintf("unzip -j %s content.xml -d %s >/dev/null", $odsfile, $cibledir);
64  system($cmd);
65 
66  $contentxml = $cibledir . "/content.xml";
67  if (file_exists($contentxml)) {
68  $content = file_get_contents($contentxml);
69  unlink($contentxml);
70  } else {
71  throw new Exception("unable to extract $odsfile");
72  }
73 
74  rmdir($cibledir);
75  return $content;
76  }
77  /**
78  * @param $xmlcontent
79  *
80  * @throws Exception
81  * @return string
82  */
83  protected function xmlcontent2csv($xmlcontent)
84  {
85 
86  $xml_parser = xml_parser_create();
87  // Use case handling $map_array
88  xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, true);
89  xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE, 0);
90  xml_set_element_handler($xml_parser, array(
91  $this,
92  "startElement"
93  ) , array(
94  $this,
95  "endElement"
96  ));
97  xml_set_character_data_handler($xml_parser, array(
98  $this,
99  "characterData"
100  ));
101 
102  if (!xml_parse($xml_parser, $xmlcontent)) {
103  throw new Exception(sprintf("Unable to parse XML : %s line %d", xml_error_string(xml_get_error_code($xml_parser)) , xml_get_current_line_number($xml_parser)));
104  }
105  $fcsv = "";
106  xml_parser_free($xml_parser);
107  foreach ($this->rows as $row) {
108  $fcsv.= implode(self::SEPCHAR, $row) . "\n";
109  }
110  return $fcsv;
111  }
112  /* Handling method for XML parser*/
113  protected function startElement(
114  /** @noinspection PhpUnusedParameterInspection */
115  $parser, $name, $attrs)
116  {
117 
118  if ($name == "TABLE:TABLE-ROW") {
119  $this->inrow = true;
120  if (isset($this->rows[$this->nrows])) {
121  // fill empty cells
122  $idx = 0;
123  /** @noinspection PhpUnusedLocalVariableInspection */
124  foreach ($this->rows[$this->nrows] as & $v) {
125  if (!isset($this->rows[$this->nrows][$idx])) {
126  $this->rows[$this->nrows][$idx] = '';
127  }
128  $idx++;
129  }
130  ksort($this->rows[$this->nrows], SORT_NUMERIC);
131  }
132  $this->nrows++;
133  $this->ncol = 0;
134  $this->rows[$this->nrows] = array();
135  }
136 
137  if ($name == "TABLE:TABLE-CELL") {
138  $this->incell = true;
139  $this->celldata = "";
140  $this->cellattrs = $attrs;
141  if (!empty($attrs["TABLE:NUMBER-COLUMNS-REPEATED"])) {
142  $this->colrepeat = intval($attrs["TABLE:NUMBER-COLUMNS-REPEATED"]);
143  }
144  }
145  if ($name == "TEXT:P") {
146  if (isset($this->rows[$this->nrows][$this->ncol])) {
147  if (strlen($this->rows[$this->nrows][$this->ncol]) > 0) {
148  $this->rows[$this->nrows][$this->ncol].= '\n';
149  }
150  }
151  }
152  }
153 
154  protected function endElement(
155  /** @noinspection PhpUnusedParameterInspection */
156  $parser, $name)
157  {
158 
159  if ($name == "TABLE:TABLE-ROW") {
160  // Remove trailing empty cells
161  $i = $this->ncol - 1;
162  while ($i >= 0) {
163  if (strlen($this->rows[$this->nrows][$i]) > 0) {
164  break;
165  }
166  $i--;
167  }
168  array_splice($this->rows[$this->nrows], $i + 1);
169  $this->inrow = false;
170  }
171  if ($name == "TEXT:S") {
172  $this->celldata.= ' ';
173  }
174  if ($name == "TABLE:TABLE-CELL") {
175  $this->incell = false;
176 
177  if ($this->celldata === '') {
178  $this->celldata = $this->getOfficeTypedValue($this->cellattrs);
179  }
180 
181  $this->rows[$this->nrows][$this->ncol] = $this->celldata;
182 
183  if ($this->colrepeat > 1) {
184  $rval = $this->rows[$this->nrows][$this->ncol];
185  for ($i = 1; $i < $this->colrepeat; $i++) {
186  $this->ncol++;
187  $this->rows[$this->nrows][$this->ncol] = $rval;
188  }
189  }
190  $this->ncol++;
191  $this->colrepeat = 0;
192  }
193  }
194 
195  protected function characterData(
196  /** @noinspection PhpUnusedParameterInspection */
197  $parser, $data)
198  {
199 
200  if ($this->inrow && $this->incell) {
201  $this->celldata.= preg_replace('/^\s*[\r\n]\s*$/ms', '', str_replace(self::SEPCHAR, self::ALTSEPCHAR, $data));
202  }
203  }
204 
205  protected function seemsODS($filename)
206  {
207  if (preg_match('/\.ods$/', $filename)) return true;
208  $sys = trim(shell_exec(sprintf("file -bi %s", escapeshellarg($filename))));
209  if ($sys == "application/x-zip") return true;
210  if ($sys == "application/zip") return true;
211  if ($sys == "application/vnd.oasis.opendocument.spreadsheet") return true;
212  return false;
213  }
214 
215  private function getOfficeTypedValue($attrs) {
216  $value = '';
217  /* Get value from property OFFICE:<type>-VALUE */
218  if (isset($attrs['OFFICE:VALUE-TYPE'])) {
219  $type = strtoupper($attrs['OFFICE:VALUE-TYPE']);
220  $propName = 'OFFICE:' . $type . '-VALUE';
221  if (isset($attrs[$propName])) {
222  $value = (string)$attrs[$propName];
223  }
224  }
225  /* Get value from property OFFICE:VALUE */
226  if ($value == '' && isset($attrs['OFFICE:VALUE'])) {
227  $value = (string)$attrs['OFFICE:VALUE'];
228  }
229  return $value;
230  }
231 }
$csv
Definition: checkVault.php:41
seemsODS($filename)
characterData($parser, $data)
$filename
const SEPCHAR
Definition: import_file.php:26
ods2content($odsfile)
const ALTSEPCHAR
Definition: import_file.php:25
endElement($parser, $name)
$csvfile
const ALTSEPCHAR
foreach($argv as $arg) $cmd
convertOds2Csv($odsfile, $csvfile)
$odsfile
startElement($parser, $name, $attrs)
xmlcontent2csv($xmlcontent)
$value
$data
← centre documentaire © anakeen