Download and convert PDF #125
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Download and convert PDF | |
on: | |
schedule: | |
- cron: '0 6,13,17,20 * * *' | |
workflow_dispatch: | |
jobs: | |
convert: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Configure Git Credentials | |
run: | | |
git config user.name ODS[bot] | |
git config user.email [email protected] | |
- name: Setup Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.x | |
- name: Setup uv | |
run: | | |
curl -LsSf https://astral.sh/uv/0.5.26/install.sh | sh | |
uv venv .venv | |
echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV | |
echo "$PWD/.venv/bin" >> $GITHUB_PATH | |
- name: Install dependencies | |
run: |- | |
mkdir -p ~/bin | |
cp bin/mlr ~/bin | |
cd ~/bin | |
chmod +x mlr | |
sudo apt-get install jq | |
uv pip install llm llm-gemini scrape-cli yq frictionless | |
- name: Setup llm | |
env: | |
GEMINI_KEY: ${{ secrets.GEMINI_KEY }} | |
run: | | |
echo "$GEMINI_KEY" | llm keys set gemini | |
- name: Download and convert data (volumi invasati mensilmente) from PDF | |
run: | | |
export PATH=$PATH:~/bin | |
chmod +x ./scripts/check_convert_volumi_mensili.sh | |
./scripts/check_convert_volumi_mensili.sh | |
- name: Frictionless 1st data validation | |
# se questo step fallisce, l'intera action si blocca quindi non si invia il messaggio e non si committano i dati, nè si va avanti con la ricerca di dati giornalieri. | |
run: frictionless validate datapackage.yaml | |
- name: Send telegram message (new data) | |
if: ${{ hashFiles('risorse/msgs/new_volumi_mensili.md') != '' }} | |
env: | |
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }} | |
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }} | |
run: | | |
curl -s -X POST https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage -d chat_id=$TELEGRAM_CHAT_ID -d parse_mode="Markdown" -d text="$(cat ./risorse/msgs/new_volumi_mensili.md)" | |
rm ./risorse/msgs/new_volumi_mensili.md | |
echo "Telegram message (new data volumi mensili) sent" | |
- name: Commit and push new data | |
run: |- | |
git add -A | |
git commit -m "[volumi mensili] Aggiornati i dati dei volumi invasati $(date --iso-8601)" || exit 0 | |
git push | |
- name: Download and convert data (volumi invasati giornalieri) from PDF | |
run: | | |
export PATH=$PATH:~/bin | |
chmod +x ./scripts/*.sh | |
./scripts/launcher.sh scripts/check_convert_volumi_giornalieri.sh 5 | |
- name: Frictionless 2nd data validation | |
run: frictionless validate datapackage.yaml | |
- name: Send telegram message (new volumi giornalieri) | |
if: ${{ hashFiles('risorse/msgs/new_volumi_giornalieri.md') != '' }} | |
env: | |
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }} | |
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }} | |
run: | | |
curl -s -X POST https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage -d chat_id=$TELEGRAM_CHAT_ID -d parse_mode="Markdown" -d text="$(cat ./risorse/msgs/new_volumi_giornalieri.md)" | |
rm ./risorse/msgs/new_volumi_giornalieri.md | |
echo "Telegram message (new data volumi giornalieri) sent" | |
- name: Commit and push new data | |
run: |- | |
git add -A | |
git commit -m "[volumi giornalieri] Aggiornati i dati dei volumi invasati $(date --iso-8601)" || exit 0 | |
git push | |
- name: Download and summarize PDF document (verbali) | |
run: chmod +x ./scripts/check_convert_verbali.sh && ./scripts/check_convert_verbali.sh | |
- name: Setup for trigger (new doc) | |
id: new_doc | |
if: ${{ hashFiles('risorse/msgs/new_verbale.md') != '' }} | |
run: | | |
echo "C'è un messaggio telegram da inviare" | |
- name: Commit and push new doc | |
run: |- | |
git add -A | |
git commit -m "[verbali] Creato nuovo blog post $(date --iso-8601)" || exit 0 | |
git push | |
- name: Trigger to deploy MkDocs | |
if: ${{steps.new_doc.outcome == 'success' }} | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token $GITHUB_TOKEN" -d '{"event_type":"new_verbale_osservatorio"}' https://api.github.com/repos/${{ github.repository }}/dispatches | |