Skip to content

Commit

Permalink
Added: Ability to extract keywords from PDF
Browse files Browse the repository at this point in the history
  • Loading branch information
bymayo committed Jan 6, 2025
1 parent 6bc9572 commit a6f567d
Show file tree
Hide file tree
Showing 12 changed files with 399 additions and 7 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# PDF Transform Changelog

## 1.0.10 - 2025-01-06

### Added
- Option to extract keywords from PDFs and save them to the database

## 1.0.9 - 2022-12-05

> **Note**
Expand Down
6 changes: 4 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "bymayo/pdf-transform",
"description": "Transform a PDF page to an image (JPEG, PNG)",
"type": "craft-plugin",
"version": "1.0.9",
"version": "1.0.10",
"keywords": [
"craft",
"cms",
Expand All @@ -23,7 +23,9 @@
],
"require": {
"craftcms/cms": "^3.0.0",
"spatie/pdf-to-image": "^2.0"
"spatie/pdf-to-image": "^2.0",
"spatie/pdf-to-text": "^1.54",
"donatello-za/rake-php-plus": "^1.0"
},
"autoload": {
"psr-4": {
Expand Down
33 changes: 31 additions & 2 deletions src/PdfTransform.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

namespace bymayo\pdftransform;

use bymayo\pdftransform\services\PdfTransformService as PdfTransformServiceService;
use bymayo\pdftransform\services\PdfTransformService;
use bymayo\pdftransform\utilities\PdfTransformUtility;
use bymayo\pdftransform\variables\PdfTransformVariable;
use bymayo\pdftransform\models\Settings;

Expand All @@ -18,6 +19,10 @@
use craft\services\Elements;
use craft\events\PluginEvent;
use craft\web\twig\variables\CraftVariable;
use craft\events\RegisterComponentTypesEvent;
use craft\services\Utilities;
use craft\web\View;
use craft\events\RegisterTemplateRootsEvent;

use yii\base\Event;

Expand Down Expand Up @@ -73,6 +78,12 @@ public function init()

Craft::getLogger()->dispatcher->targets[] = $fileTarget;

Craft::setAlias('@pdf-transform', __DIR__);

$this->setComponents([
'pdfTransform' => PdfTransformService::class
]);

Event::on(
CraftVariable::class,
CraftVariable::EVENT_INIT,
Expand Down Expand Up @@ -110,13 +121,31 @@ function(Event $event) {

if ($element instanceof \craft\elements\Asset) {
if ($event->isNew && $element->extension === 'pdf' && $this->getSettings()->transformPdfsOnUpload) {
PdfTransform::$plugin->pdfTransformService->pdfToImage($element);
PdfTransform::$plugin->pdfTransformService->pdfToImage($element, $this->settings->indexKeywords);
}
}

}
);

Event::on(
View::class,
View::EVENT_REGISTER_CP_TEMPLATE_ROOTS,
function (RegisterTemplateRootsEvent $event) {
if (is_dir($baseDir = $this->getBasePath() . DIRECTORY_SEPARATOR . 'templates')) {
$event->roots[$this->id] = $baseDir;
}
}
);

Event::on(
Utilities::class,
Utilities::EVENT_REGISTER_UTILITY_TYPES,
function(RegisterComponentTypesEvent $event) {
$event->types[] = PdfTransformUtility::class;
}
);

}

// Protected Methods
Expand Down
28 changes: 28 additions & 0 deletions src/controllers/TransformController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

namespace bymayo\pdftransform\controllers;

use craft\web\Controller;

use bymayo\pdftransform\PdfTransform;

use Craft;

class TransformController extends Controller
{

protected $allowAnonymous = array('transform-pdfs');

public function actionTransformPdfs()
{

$request = Craft::$app->getRequest();

$volumeFolder = $request->getParam('volumeFolder');
$indexKeywords = $request->getParam('indexKeywords');

PdfTransform::$plugin->pdfTransformService->transformVolumeFolder($volumeFolder, $indexKeywords);

return $this->redirectToPostedUrl();
}
}
41 changes: 41 additions & 0 deletions src/jobs/TransformJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?php

namespace bymayo\pdftransform\jobs;

use bymayo\pdftransform\PdfTransform;

use Craft;
use craft\queue\BaseJob;

class TransformJob extends BaseJob
{

public $assets;
public $indexKeywords;

public function execute($queue): void
{

$totalAssets = count($this->assets);

foreach ($this->assets as $i => $asset) {

$this->setProgress(
$queue,
$i / $totalAssets,
Craft::t('pdf-transform', '{step, number} of {total, number}', [
'step' => $i + 1,
'total' => $totalAssets,
])
);

PdfTransform::$plugin->pdfTransformService->pdfToImage($asset, $this->indexKeywords);
}

}

protected function defaultDescription(): string
{
return Craft::t('pdf-transform', 'Transforming PDFs to images');
}
}
51 changes: 51 additions & 0 deletions src/migrations/m250106_170049_createPdfKeywordsTable.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<?php

namespace bymayo\pdftransform\migrations;

use Craft;
use craft\db\Migration;

/**
* m250106_170049_createPdfKeywordsTable migration.
*/
class m250106_170049_createPdfKeywordsTable extends Migration
{
/**
* @inheritdoc
*/
public function safeUp()
{

$this->createTable('{{%pdftransform_keywords}}', [
'id' => $this->primaryKey(),
'pdfAssetId' => $this->integer()->notNull(),
'imageAssetId' => $this->integer()->notNull(),
'keywords' => $this->text()->notNull(),
'dateCreated' => $this->dateTime(),
'dateUpdated' => $this->dateTime(),
'uid' => $this->uid(),
]);

$this->addForeignKey(
$this->db->getForeignKeyName('{{%pdftransform_keywords}}', 'pdfAssetId'),
'{{%pdftransform_keywords}}',
'pdfAssetId',
'{{%assets}}',
'id',
'CASCADE',
'CASCADE'
);

return true;

}

/**
* @inheritdoc
*/
public function safeDown()
{
echo "m250106_170049_createPdfKeywordsTable cannot be reverted.\n";
return false;
}
}
3 changes: 2 additions & 1 deletion src/models/Settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Settings extends Model
* @var string
*/
public $transformPdfsOnUpload = false;
public $indexKeywords = false;
public $imageVolume = null;
public $page = 1;
public $imageFormat = 'jpg';
Expand All @@ -43,7 +44,7 @@ public function rules()
{
return [
[['page', 'imageVolume', 'imageResolution', 'imageQuality'], 'integer'],
['transformPdfsOnUpload', 'boolean'],
[['transformPdfsOnUpload', 'indexKeywords'], 'boolean'],
['imageFormat', 'string']
];
}
Expand Down
Loading

0 comments on commit a6f567d

Please sign in to comment.