Skip to content

Download and convert PDF #119

Download and convert PDF

Download and convert PDF #119

Workflow file for this run

name: Download and convert PDF
on:
schedule:
- cron: '0 6,13,17,20 * * *'
workflow_dispatch:
jobs:
convert:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Configure Git Credentials
run: |
git config user.name ODS[bot]
git config user.email [email protected]
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: 3.x
- name: Setup uv
run: |
curl -LsSf https://astral.sh/uv/0.5.26/install.sh | sh
uv venv .venv
echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV
echo "$PWD/.venv/bin" >> $GITHUB_PATH
- name: Install dependencies
run: |-
mkdir -p ~/bin
cp bin/mlr ~/bin
cd ~/bin
chmod +x mlr
sudo apt-get install jq
uv pip install llm llm-gemini scrape-cli yq frictionless
- name: Setup llm
env:
GEMINI_KEY: ${{ secrets.GEMINI_KEY }}
run: |
echo "$GEMINI_KEY" | llm keys set gemini
- name: Download and convert data (volumi invasati mensilmente) from PDF
run: |
export PATH=$PATH:~/bin
chmod +x ./scripts/check_convert_volumi_mensili.sh
./scripts/check_convert_volumi_mensili.sh
- name: Frictionless 1st data validation
# se questo step fallisce, l'intera action si blocca quindi non si invia il messaggio e non si committano i dati, nè si va avanti con la ricerca di dati giornalieri.
run: frictionless validate datapackage.yaml
- name: Send telegram message (new data)
if: ${{ hashFiles('risorse/msgs/new_volumi_mensili.md') != '' }}
env:
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
run: |
curl -s -X POST https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage -d chat_id=$TELEGRAM_CHAT_ID -d parse_mode="Markdown" -d text="$(cat ./risorse/msgs/new_volumi_mensili.md)"
rm ./risorse/msgs/new_volumi_mensili.md
echo "Telegram message (new data volumi mensili) sent"
- name: Commit and push new data
run: |-
git add -A
git commit -m "[volumi mensili] Aggiornati i dati dei volumi invasati $(date --iso-8601)" || exit 0
git push
- name: Download and convert data (volumi invasati giornalieri) from PDF
run: |
export PATH=$PATH:~/bin
chmod +x ./scripts/*.sh
./scripts/launcher.sh scripts/check_convert_volumi_giornalieri.sh 5
- name: Frictionless 2nd data validation
run: frictionless validate datapackage.yaml
- name: Send telegram message (new volumi giornalieri)
if: ${{ hashFiles('risorse/msgs/new_volumi_giornalieri.md') != '' }}
env:
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
run: |
curl -s -X POST https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage -d chat_id=$TELEGRAM_CHAT_ID -d parse_mode="Markdown" -d text="$(cat ./risorse/msgs/new_volumi_giornalieri.md)"
rm ./risorse/msgs/new_volumi_giornalieri.md
echo "Telegram message (new data volumi giornalieri) sent"
- name: Commit and push new data
run: |-
git add -A
git commit -m "[volumi giornalieri] Aggiornati i dati dei volumi invasati $(date --iso-8601)" || exit 0
git push
- name: Download and summarize PDF document (verbali)
run: chmod +x ./scripts/check_convert_verbali.sh && ./scripts/check_convert_verbali.sh
- name: Setup for trigger (new doc)
id: new_doc
if: ${{ hashFiles('risorse/msgs/new_verbale.md') != '' }}
run: |
echo "C'è un messaggio telegram da inviare"
- name: Commit and push new doc
run: |-
git add -A
git commit -m "[verbali] Creato nuovo blog post $(date --iso-8601)" || exit 0
git push
- name: Trigger to deploy MkDocs
if: ${{steps.new_doc.outcome == 'success' }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token $GITHUB_TOKEN" -d '{"event_type":"new_verbale_osservatorio"}' https://api.github.com/repos/${{ github.repository }}/dispatches