Skip to content

Commit ea5cebd

Browse files
Merge pull request #175 from cleverage/174
#174 Add FileSplitterTask using Filesystem/SplFile. Add doc.
2 parents 3d3af7d + 97ac380 commit ea5cebd

File tree

4 files changed

+259
-0
lines changed

4 files changed

+259
-0
lines changed

docs/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
- [FileMoverTask]
5959
- [FileReaderTask](reference/tasks/file_reader_task.md)
6060
- [FileRemoverTask]
61+
- [FileSplitterTask](reference/tasks/file_splitter_task.md)
6162
- [FileWriterTask]
6263
- [FolderBrowserTask](reference/tasks/folder_browser_task.md)
6364
- [InputFileReaderTask](reference/tasks/input_file_reader_task.md)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
FileSplitterTask
2+
=============
3+
4+
Split long file into smaller ones
5+
6+
Task reference
7+
--------------
8+
9+
* **Service**: `CleverAge\ProcessBundle\Task\File\FileSplitterTask`
10+
* **Iterable task**
11+
12+
Accepted inputs
13+
---------------
14+
15+
`array`: inputs are merged with task defined options.
16+
17+
Possible outputs
18+
----------------
19+
20+
`string`: absolute path of the produced file
21+
22+
Options
23+
-------
24+
25+
| Code | Type | Required | Default | Description |
26+
|-------------------------|-----------------|:--------:|----------|------------------------------------------|
27+
| `file_path` | `string` | **X** | | Path of the file to read from (absolute) |
28+
| `max_lines` | `int` | **X** | 1000 | Max number of line on a produced file |
29+
30+
Example
31+
-------
32+
33+
```yaml
34+
# Task configuration level
35+
entry:
36+
service: '@CleverAge\ProcessBundle\Task\File\FileSplitterTask'
37+
options:
38+
file_path: '%kernel.project_dir%/var/data/json_stream_reader.json'
39+
max_lines: 1
40+
```
41+
42+

src/Filesystem/SplFile.php

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of the CleverAge/ProcessBundle package.
7+
*
8+
* Copyright (c) Clever-Age
9+
*
10+
* For the full copyright and license information, please view the LICENSE
11+
* file that was distributed with this source code.
12+
*/
13+
14+
namespace CleverAge\ProcessBundle\Filesystem;
15+
16+
/**
17+
* Wrapper around files to read/write them.
18+
*/
19+
class SplFile
20+
{
21+
protected \SplFileObject $file;
22+
23+
protected ?int $lineCount = null;
24+
25+
protected int $lineNumber = 1;
26+
27+
public function __construct(
28+
string $filename,
29+
string $mode = 'rb',
30+
?array $splFileObjectFlags = null,
31+
) {
32+
$this->file = new \SplFileObject($filename, $mode);
33+
34+
// Useful to skip empty trailing lines (doesn't work well on PHP 8, see readLine() code)
35+
$this->file->setFlags(null !== $splFileObjectFlags
36+
? array_sum($splFileObjectFlags)
37+
: \SplFileObject::DROP_NEW_LINE | \SplFileObject::READ_AHEAD | \SplFileObject::SKIP_EMPTY
38+
);
39+
}
40+
41+
/**
42+
* Warning! This method will rewind the file to the beginning before and after counting the lines!
43+
*/
44+
public function getLineCount(): int
45+
{
46+
if (null === $this->lineCount) {
47+
$this->rewind();
48+
$line = 0;
49+
while (!$this->isEndOfFile()) {
50+
++$line;
51+
$this->file->next();
52+
}
53+
$this->rewind();
54+
55+
$this->lineCount = $line;
56+
}
57+
58+
return $this->lineCount;
59+
}
60+
61+
public function getLineNumber(): int
62+
{
63+
return $this->lineNumber;
64+
}
65+
66+
public function isEndOfFile(): bool
67+
{
68+
return $this->file->eof();
69+
}
70+
71+
/**
72+
* Return an array containing current data and moving the file pointer.
73+
*/
74+
public function readLine(?int $length = null): ?string
75+
{
76+
if ($this->isEndOfFile()) {
77+
return null;
78+
}
79+
80+
$rawLine = $this->file->fgets();
81+
// Fix issue on PHP 8 with empty line at the end, even if SKIP_EMPTY is set
82+
if ('' === $rawLine) {
83+
return null;
84+
}
85+
++$this->lineNumber;
86+
87+
return $rawLine;
88+
}
89+
90+
public function writeLine(string $data): int
91+
{
92+
$this->file->fwrite($data.\PHP_EOL);
93+
++$this->lineNumber;
94+
95+
return $this->lineNumber;
96+
}
97+
98+
/**
99+
* Rewind data to array.
100+
*/
101+
public function rewind(): void
102+
{
103+
$this->file->rewind();
104+
$this->lineNumber = 1;
105+
}
106+
}

src/Task/File/FileSplitterTask.php

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of the CleverAge/ProcessBundle package.
7+
*
8+
* Copyright (c) Clever-Age
9+
*
10+
* For the full copyright and license information, please view the LICENSE
11+
* file that was distributed with this source code.
12+
*/
13+
14+
namespace CleverAge\ProcessBundle\Task\File;
15+
16+
use CleverAge\ProcessBundle\Filesystem\SplFile;
17+
use CleverAge\ProcessBundle\Model\AbstractConfigurableTask;
18+
use CleverAge\ProcessBundle\Model\IterableTaskInterface;
19+
use CleverAge\ProcessBundle\Model\ProcessState;
20+
use Symfony\Component\OptionsResolver\OptionsResolver;
21+
22+
/**
23+
* Split long file into smaller ones.
24+
*/
25+
class FileSplitterTask extends AbstractConfigurableTask implements IterableTaskInterface
26+
{
27+
protected ?SplFile $file = null;
28+
29+
private ?array $splFileObjectFlags = null;
30+
31+
private int $lineCount;
32+
33+
public function execute(ProcessState $state): void
34+
{
35+
$options = $this->getMergedOptions($state);
36+
$this->splFileObjectFlags = [\SplFileObject::READ_AHEAD, \SplFileObject::SKIP_EMPTY];
37+
if (!$this->file instanceof SplFile) {
38+
$this->file = new SplFile($options['file_path'], 'rb', $this->splFileObjectFlags);
39+
$this->lineCount = $this->file->getLineCount();
40+
}
41+
42+
// Return a temporary file containing a limited number of lines
43+
$splittedFilename = $this->splitFile($this->file, $options['max_lines']);
44+
$state->setOutput($splittedFilename);
45+
}
46+
47+
/**
48+
* Moves the internal pointer to the next element,
49+
* return true if the task has a next element
50+
* return false if the task has terminated it's iteration.
51+
*/
52+
public function next(ProcessState $state): bool
53+
{
54+
if (!$this->file instanceof SplFile) {
55+
return false;
56+
}
57+
58+
// Fix issue on PHP 8 with empty line at the end, even if SKIP_EMPTY is set
59+
$endOfFile = $this->file->isEndOfFile() || $this->file->getLineNumber() > $this->lineCount;
60+
if ($endOfFile) {
61+
$this->file = null;
62+
}
63+
64+
return !$endOfFile;
65+
}
66+
67+
protected function splitFile(SplFile $file, int $maxLines): string
68+
{
69+
$tmpFilePath = sys_get_temp_dir().\DIRECTORY_SEPARATOR.'php_'.uniqid('process', false).'.tmp';
70+
$splitFile = new SplFile($tmpFilePath, 'wb', $this->splFileObjectFlags);
71+
72+
while ($splitFile->getLineNumber() <= $maxLines && !$file->isEndOfFile()) {
73+
$line = $file->readLine();
74+
if ('' === $line || null === $line) {
75+
continue; // This is probably an empty line, no harm to skip it
76+
}
77+
$splitFile->writeLine($line);
78+
}
79+
80+
return $tmpFilePath;
81+
}
82+
83+
protected function configureOptions(OptionsResolver $resolver): void
84+
{
85+
$resolver->setRequired(['file_path']);
86+
$resolver->setAllowedTypes('file_path', ['string']);
87+
$resolver->setDefaults([
88+
'max_lines' => 1000,
89+
]);
90+
$resolver->setAllowedTypes('max_lines', ['int']);
91+
}
92+
93+
/**
94+
* @return array<mixed>
95+
*/
96+
protected function getMergedOptions(ProcessState $state): array
97+
{
98+
/** @var array<mixed> $options */
99+
$options = $this->getOptions($state);
100+
101+
/** @var array<mixed>|mixed $input */
102+
$input = $state->getInput() ?: [];
103+
if (!\is_array($input)) {
104+
$input = [];
105+
}
106+
// @var array<mixed> $input
107+
108+
return array_merge($options, $input);
109+
}
110+
}

0 commit comments

Comments
 (0)