Skip to content
This repository was archived by the owner on Apr 15, 2020. It is now read-only.

Commit 06f391f

Browse files
committed
update on the crackpdf feature
1 parent 62f76b9 commit 06f391f

File tree

4 files changed

+63
-7
lines changed

4 files changed

+63
-7
lines changed

Diff for: README.md

+23-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,28 @@ vi config.php
3939

4040
Run the update script, ideally you would do this periodically e.g. by cron:
4141

42-
```
42+
```bash
4343
php bin/update.php
4444
```
45+
46+
47+
Configuration
48+
-------------
49+
50+
### Credentials
51+
52+
You need to specify the username and the passwort of your ZIV account in `username` and `password` setting in the `config.php`.
53+
54+
### Courses
55+
56+
Add all courses you want to crawl to the `config.php`, see `config.php.dist` for some examples. There are two types of courses in Learnweb, some use the course view to list files, some use the folder view of Moodle. Looking at the URL you can easily determine which type of course you are dealing with.
57+
58+
### Cracking PDF files
59+
60+
Sometimes PDF files have a user password which you need to enter all the time you open the PDF file. Learnweb-Crawler can convert the PDF file to PS and back to PDF which generates a PDF file which can be opened without password. This only works if you know the user password and the tools `pdftops` and `ps2pdf` are installed on your system. On Debian simply install these two packages:
61+
62+
```bash
63+
apt-get install ghostscript poppler-utils
64+
```
65+
66+
The actually executed commands may be specified as `pdf2PsCmd` and `ps2PdfCmd` in the `config.php`, see `config.php.dist` for an example.

Diff for: bin/update.php

+3-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
define('LEARNWEB_DEBUG', (bool) $config->debug);
1919

2020
// create pdf cracker
21-
$pdf = new LearnwebCrawler\PdfCrack();
21+
$pdf2Ps = (isset($config->pdf2PsCmd) ? $config->pdf2PsCmd : null);
22+
$ps2Pdf = (isset($config->ps2Pdf) ? $config->ps2Pdf : null);
23+
$pdf = new LearnwebCrawler\PdfCrack($pdf2Ps, $ps2Pdf);
2224

2325
// handle learnweb courses
2426
if (isset($config->learnwebcourses)) {

Diff for: config.php.dist

+14-3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ return array(
2525
array(
2626
'source' => '...',
2727
'target' => '...',
28+
// add the crackpdf key if you want to convert PDF files
2829
'crackpdf' => '...'
2930
)
3031
),
@@ -46,8 +47,8 @@ return array(
4647
)
4748
),
4849

49-
// There is still support for uDoo courses, even though it is not used anymore
50-
// You need to specify the URL of the course and the target folder where to store files
50+
// There is still support for uDoo courses, even though it is not used anymore
51+
// You need to specify the URL of the course and the target folder where to store files
5152
'udoo' => array(
5253
array(
5354
'source' => 'http://www.wi.uni-muenster.de/...',
@@ -67,5 +68,15 @@ return array(
6768
)
6869
)
6970
)
70-
)
71+
),
72+
73+
// You man specify a custom pdf2ps command if you like, %1$s is the PDF passwort,
74+
// %2$s is the source file (PDF file), %3$s is the target file (temporary PS file)
75+
// simply remove this setting if you want to stick with the defaults
76+
'pdf2PsCmd' => '/usr/bin/pdftops -upw %1$s %2$s %3$s',
77+
78+
// You man specify a custom ps2pdf command if you like, %1$s is the source file
79+
// (temporary PS file) and %1$s is the target file (PDF file)
80+
// simply remove this setting if you want to stick with the defaults
81+
'ps2PdfCmd' => '/usr/bin/ps2pdf %1$s %1$s'
7182
);

Diff for: library/LearnwebCrawler/PdfCrack.php

+23-2
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,44 @@
44

55
class PdfCrack
66
{
7+
protected $pdf2PsCmd = null;
8+
protected $ps2PdfCmd = null;
9+
10+
public function __construct($pdf2PsCmd, $ps2PdfCmd)
11+
{
12+
if ($pdf2PsCmd != null) {
13+
$this->pdf2PsCmd = (string) $pdf2PsCmd;
14+
}
15+
else {
16+
$this->pdf2PsCmd = "pdftops -upw %s %s %s";
17+
}
18+
19+
if ($ps2PdfCmd != null) {
20+
$this->ps2PdfCmd = (string) $ps2PdfCmd;
21+
}
22+
else {
23+
$this->ps2PdfCmd = "ps2pdf %s %s";
24+
}
25+
}
26+
727
public function crackFile ($file, $password)
828
{
29+
if ($password === false) return;
930
if (substr($file, -4) != '.pdf') return;
1031

1132
$psFile = substr($file, 0, -4) . '.tmp.ps';
1233
$crackFile = substr($file, 0, -4) . '.cracked.pdf';
1334

1435
$cmd = sprintf(
15-
'pdftops -upw %s %s %s',
36+
$this->pdf2PsCmd,
1637
escapeshellarg($password),
1738
escapeshellarg($file),
1839
escapeshellarg($psFile)
1940
);
2041
exec ($cmd);
2142

2243
$cmd = sprintf(
23-
'ps2pdf %s %s',
44+
$this->ps2PdfCmd,
2445
escapeshellarg($psFile),
2546
escapeshellarg($crackFile)
2647
);

0 commit comments

Comments
 (0)