src/Eccube/Service/CsvImportService.php line 221

Open in your IDE?
  1. <?php
  2. /*
  3. * This file is part of EC-CUBE
  4. *
  5. * Copyright(c) EC-CUBE CO.,LTD. All Rights Reserved.
  6. *
  7. * http://www.ec-cube.co.jp/
  8. *
  9. * For the full copyright and license information, please view the LICENSE
  10. * file that was distributed with this source code.
  11. */
  12. namespace Eccube\Service;
  13. use Eccube\Stream\Filter\ConvertLineFeedFilter;
  14. use Eccube\Stream\Filter\SjisToUtf8EncodingFilter;
  15. /**
  16. * Copyright (C) 2012-2014 David de Boer <david@ddeboer.nl>
  17. *
  18. * Permission is hereby granted, free of charge, to any person obtaining a copy of
  19. * this software and associated documentation files (the "Software"), to deal in
  20. * the Software without restriction, including without limitation the rights to
  21. * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  22. * the Software, and to permit persons to whom the Software is furnished to do so,
  23. * subject to the following conditions:
  24. *
  25. * The above copyright notice and this permission notice shall be included in all
  26. * copies or substantial portions of the Software.
  27. *
  28. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  29. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  30. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  31. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  32. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  33. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  34. * SOFTWARE.
  35. */
  36. class CsvImportService implements \Iterator, \SeekableIterator, \Countable
  37. {
  38. public const DUPLICATE_HEADERS_INCREMENT = 1;
  39. public const DUPLICATE_HEADERS_MERGE = 2;
  40. /**
  41. * Number of the row that contains the column names
  42. *
  43. * @var integer
  44. */
  45. protected $headerRowNumber;
  46. /**
  47. * CSV file
  48. *
  49. * @var \SplFileObject
  50. */
  51. protected $file;
  52. /**
  53. * Column headers as read from the CSV file
  54. *
  55. * @var array
  56. */
  57. protected $columnHeaders = [];
  58. /**
  59. * Number of column headers, stored and re-used for performance
  60. *
  61. * In case of duplicate headers, this is always the number of unmerged headers.
  62. *
  63. * @var integer
  64. */
  65. protected $headersCount;
  66. /**
  67. * Total number of rows in the CSV file
  68. *
  69. * @var integer
  70. */
  71. protected $count;
  72. /**
  73. * Faulty CSV rows
  74. *
  75. * @var array
  76. */
  77. protected $errors = [];
  78. /**
  79. * How to handle duplicate headers
  80. *
  81. * @var integer
  82. */
  83. protected $duplicateHeadersFlag;
  84. /**
  85. * @param \SplFileObject $file
  86. * @param string $delimiter
  87. * @param string $enclosure
  88. * @param string $escape
  89. */
  90. public function __construct(\SplFileObject $file, $delimiter = ',', $enclosure = '"', $escape = '\\')
  91. {
  92. ini_set('auto_detect_line_endings', true);
  93. // stream filter を適用して文字エンコーディングと改行コードの変換を行う
  94. // see https://github.com/EC-CUBE/ec-cube/issues/5252
  95. $filters = [
  96. ConvertLineFeedFilter::class,
  97. ];
  98. if (!\mb_check_encoding($file->current(), 'UTF-8')) {
  99. // UTF-8 が検出できなかった場合は SJIS-win の stream filter を適用する
  100. $filters[] = SjisToUtf8EncodingFilter::class;
  101. }
  102. $this->file = self::applyStreamFilter($file, ...$filters);
  103. $this->file->setFlags(
  104. \SplFileObject::READ_CSV |
  105. \SplFileObject::SKIP_EMPTY |
  106. \SplFileObject::READ_AHEAD
  107. );
  108. $this->file->setCsvControl(
  109. $delimiter,
  110. $enclosure,
  111. $escape
  112. );
  113. }
  114. /**
  115. * Return the current row as an array
  116. *
  117. * If a header row has been set, an associative array will be returned
  118. *
  119. * @return array
  120. */
  121. public function current()
  122. {
  123. // If the CSV has no column headers just return the line
  124. if (empty($this->columnHeaders)) {
  125. return $this->file->current();
  126. }
  127. // Since the CSV has column headers use them to construct an associative array for the columns in this line
  128. if ($this->valid()) {
  129. $current = $this->file->current();
  130. $line = $current;
  131. // See if values for duplicate headers should be merged
  132. if (self::DUPLICATE_HEADERS_MERGE === $this->duplicateHeadersFlag) {
  133. $line = $this->mergeDuplicates($line);
  134. }
  135. // Count the number of elements in both: they must be equal.
  136. if (count($this->columnHeaders) === count($line)) {
  137. return array_combine(array_keys($this->columnHeaders), $line);
  138. } else {
  139. return $line;
  140. }
  141. }
  142. return null;
  143. }
  144. /**
  145. * Get column headers
  146. *
  147. * @return array
  148. */
  149. public function getColumnHeaders()
  150. {
  151. return array_keys($this->columnHeaders);
  152. }
  153. /**
  154. * Set column headers
  155. *
  156. * @param array $columnHeaders
  157. */
  158. public function setColumnHeaders(array $columnHeaders)
  159. {
  160. $this->columnHeaders = array_count_values($columnHeaders);
  161. $this->headersCount = count($columnHeaders);
  162. }
  163. /**
  164. * Set header row number
  165. *
  166. * @param integer $rowNumber Number of the row that contains column header names
  167. * @param integer $duplicates How to handle duplicates (optional). One of:
  168. * - CsvReader::DUPLICATE_HEADERS_INCREMENT;
  169. * increments duplicates (dup, dup1, dup2 etc.)
  170. * - CsvReader::DUPLICATE_HEADERS_MERGE; merges
  171. * values for duplicate headers into an array
  172. * (dup => [value1, value2, value3])
  173. *
  174. * @return boolean
  175. */
  176. public function setHeaderRowNumber($rowNumber, $duplicates = null)
  177. {
  178. $this->duplicateHeadersFlag = $duplicates;
  179. $this->headerRowNumber = $rowNumber;
  180. $headers = $this->readHeaderRow($rowNumber);
  181. if ($headers === false) {
  182. return false;
  183. }
  184. $this->setColumnHeaders($headers);
  185. return true;
  186. }
  187. /**
  188. * Rewind the file pointer
  189. *
  190. * If a header row has been set, the pointer is set just below the header
  191. * row. That way, when you iterate over the rows, that header row is
  192. * skipped.
  193. */
  194. public function rewind()
  195. {
  196. $this->file->rewind();
  197. if (null !== $this->headerRowNumber) {
  198. $this->file->seek($this->headerRowNumber + 1);
  199. }
  200. }
  201. /**
  202. * {@inheritdoc}
  203. */
  204. public function count()
  205. {
  206. if (null === $this->count) {
  207. $position = $this->key();
  208. $this->count = iterator_count($this);
  209. $this->seek($position);
  210. }
  211. return $this->count;
  212. }
  213. /**
  214. * {@inheritdoc}
  215. */
  216. public function next()
  217. {
  218. $this->file->next();
  219. }
  220. /**
  221. * {@inheritdoc}
  222. */
  223. public function valid()
  224. {
  225. return $this->file->valid();
  226. }
  227. /**
  228. * {@inheritdoc}
  229. */
  230. public function key()
  231. {
  232. return $this->file->key();
  233. }
  234. /**
  235. * {@inheritdoc}
  236. */
  237. public function seek($pointer)
  238. {
  239. $this->file->seek($pointer);
  240. }
  241. /**
  242. * {@inheritdoc}
  243. */
  244. public function getFields()
  245. {
  246. return $this->getColumnHeaders();
  247. }
  248. /**
  249. * Get a row
  250. *
  251. * @param integer $number Row number
  252. *
  253. * @return array
  254. */
  255. public function getRow($number)
  256. {
  257. $this->seek($number);
  258. return $this->current();
  259. }
  260. /**
  261. * Get rows that have an invalid number of columns
  262. *
  263. * @return array
  264. */
  265. public function getErrors()
  266. {
  267. if (0 === $this->key()) {
  268. // Iterator has not yet been processed, so do that now
  269. foreach ($this as $row) { /* noop */
  270. }
  271. }
  272. return $this->errors;
  273. }
  274. /**
  275. * Does the reader contain any invalid rows?
  276. *
  277. * @return boolean
  278. */
  279. public function hasErrors()
  280. {
  281. return count($this->getErrors()) > 0;
  282. }
  283. /**
  284. * Stream filter を適用し, 新たな SplFileObject を返す.
  285. *
  286. * @param \SplFileObject $file Stream filter を適用する SplFileObject
  287. * @param \php_user_filter $filters 適用する stream filter のクラス名
  288. *
  289. * @return \SplFileObject 適用後の SplFileObject
  290. */
  291. public static function applyStreamFilter(\SplFileObject $file, string ...$filters): \SplFileObject
  292. {
  293. foreach ($filters as $filter) {
  294. \stream_filter_register($filter, $filter);
  295. }
  296. $tempFile = tmpfile();
  297. try {
  298. foreach ($filters as $filter) {
  299. \stream_filter_append($tempFile, $filter);
  300. }
  301. foreach ($file as $line) {
  302. fwrite($tempFile, $line);
  303. }
  304. $meta = \stream_get_meta_data($tempFile);
  305. return new \SplFileObject($meta['uri'], 'r');
  306. } finally {
  307. fclose($tempFile);
  308. }
  309. }
  310. /**
  311. * Read header row from CSV file
  312. *
  313. * @param integer $rowNumber Row number
  314. *
  315. * @return array
  316. */
  317. protected function readHeaderRow($rowNumber)
  318. {
  319. $this->file->seek($rowNumber);
  320. $headers = $this->file->current();
  321. return $headers;
  322. }
  323. /**
  324. * Add an increment to duplicate headers
  325. *
  326. * So the following line:
  327. * |duplicate|duplicate|duplicate|
  328. * |first |second |third |
  329. *
  330. * Yields value:
  331. * $duplicate => 'first', $duplicate1 => 'second', $duplicate2 => 'third'
  332. *
  333. * @param array $headers
  334. *
  335. * @return array
  336. */
  337. protected function incrementHeaders(array $headers)
  338. {
  339. $incrementedHeaders = [];
  340. foreach (array_count_values($headers) as $header => $count) {
  341. if ($count > 1) {
  342. $incrementedHeaders[] = $header;
  343. for ($i = 1; $i < $count; $i++) {
  344. $incrementedHeaders[] = $header.$i;
  345. }
  346. } else {
  347. $incrementedHeaders[] = $header;
  348. }
  349. }
  350. return $incrementedHeaders;
  351. }
  352. /**
  353. * Merges values for duplicate headers into an array
  354. *
  355. * So the following line:
  356. * |duplicate|duplicate|duplicate|
  357. * |first |second |third |
  358. *
  359. * Yields value:
  360. * $duplicate => ['first', 'second', 'third']
  361. *
  362. * @param array $line
  363. *
  364. * @return array
  365. */
  366. protected function mergeDuplicates(array $line)
  367. {
  368. $values = [];
  369. $i = 0;
  370. foreach ($this->columnHeaders as $count) {
  371. if (1 === $count) {
  372. $values[] = $line[$i];
  373. } else {
  374. $values[] = array_slice($line, $i, $count);
  375. }
  376. $i += $count;
  377. }
  378. return $values;
  379. }
  380. /**
  381. * 行の文字エンコーディングを変換する.
  382. *
  383. * Windows 版 PHP7 環境では、ファイルエンコーディングが CP932 になるため UTF-8 に変換する.
  384. * それ以外の環境では何もしない。
  385. *
  386. * @deprecated 使用していないため削除予定
  387. */
  388. protected function convertEncodingRows($row)
  389. {
  390. @trigger_error('The '.__METHOD__.' method is deprecated.', E_USER_DEPRECATED);
  391. if ('\\' === DIRECTORY_SEPARATOR && PHP_VERSION_ID >= 70000) {
  392. foreach ($row as &$col) {
  393. $col = mb_convert_encoding($col, 'UTF-8', 'SJIS-win');
  394. }
  395. }
  396. return $row;
  397. }
  398. }