From d68515f745a626f5b3d32bf26326d9be5bc00573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Martano?= Date: Thu, 24 Oct 2024 14:21:24 -0300 Subject: [PATCH 01/11] Fix typo. (#27) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1e8df65..146a70c 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ from tika_client import TikaClient test_file = Path("sample.docx") -with TikaClient("http://localhost:9998") as client +with TikaClient("http://localhost:9998") as client: # Extract a document's metadata metadata = client.metadata.from_file(test_file) From c864c57449e54ec74e8fd2e9199c742b5bf3556d Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:33:46 -0700 Subject: [PATCH 02/11] Fix tests with Tika v3 (#28) --- CHANGELOG.md | 6 ++++++ tests/test_resource_tika.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0681ad..8efdfcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- Tests failed when run with Tika v3 ([#28](https://github.com/stumpylog/tika-client/pull/28)) + ## [0.7.0] - 2024-10-09 ### Added diff --git a/tests/test_resource_tika.py b/tests/test_resource_tika.py index 989db67..c3d2981 100644 --- a/tests/test_resource_tika.py +++ b/tests/test_resource_tika.py @@ -146,7 +146,7 @@ def test_html_document_from_string_buffer(self, tika_client: TikaClient, sample_ resp = tika_client.tika.as_text.from_buffer(buffer) assert resp.type == "text/html; charset=UTF-8" - assert resp.parsers == ["org.apache.tika.parser.DefaultParser", "org.apache.tika.parser.html.HtmlParser"] + assert resp.parsers == ["org.apache.tika.parser.DefaultParser", "org.apache.tika.parser.html.JSoupParser"] assert "Hello world! This is HTML5 content in a file for" in resp.data["X-TIKA:content"] assert resp.data["dc:title"] == "This Is A Test" assert resp.data["description"] == "A sample HTML file" From 40b45c300573997ad0d536e68b7a3f115a58bff2 Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Sun, 1 Dec 2024 19:30:58 -0800 Subject: [PATCH 03/11] Removes PyPy 3.8 from test matrix --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 203f90e..5ddbd10 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,7 +58,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13', 'pypy3.8', 'pypy3.9', 'pypy3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13', 'pypy3.9', 'pypy3.10'] steps: - From 620935237636dc8c8aa5d8711e8aa1f64911cc09 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Dec 2024 19:41:53 -0800 Subject: [PATCH 04/11] Bump pypa/gh-action-pypi-publish from 1.10.2 to 1.12.2 (#33) * Bump pypa/gh-action-pypi-publish from 1.10.2 to 1.12.2 Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.10.2 to 1.12.2. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.10.2...v1.12.2) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Trenton Holmes <797416+stumpylog@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5ddbd10..183a1c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -180,4 +180,4 @@ jobs: path: dist - name: Publish build to PyPI - uses: pypa/gh-action-pypi-publish@v1.10.2 + uses: pypa/gh-action-pypi-publish@v1.12.2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 8efdfcb..ec24862 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Tests failed when run with Tika v3 ([#28](https://github.com/stumpylog/tika-client/pull/28)) +### Changed + +- Bump pypa/gh-action-pypi-publish from 1.10.2 to 1.12.2 (by [@dependabot](https://github.com/apps/dependabot) in [#33](https://github.com/stumpylog/tika-client/pull/33)) + ## [0.7.0] - 2024-10-09 ### Added From 49a67008f1729b79cb9c3d2462aefec7504f2953 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:15:22 -0800 Subject: [PATCH 05/11] Bump codecov/codecov-action from 4 to 5 (#32) * Bump codecov/codecov-action from 4 to 5 Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4 to 5. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4...v5) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Trenton Holmes <797416+stumpylog@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 183a1c6..021f5e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,7 +88,7 @@ jobs: - name: Upload coverage to Codecov if: matrix.python-version == '3.10' - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: # not required for public repos, but intermittently fails otherwise token: ${{ secrets.CODECOV_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index ec24862..3221e22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Bump pypa/gh-action-pypi-publish from 1.10.2 to 1.12.2 (by [@dependabot](https://github.com/apps/dependabot) in [#33](https://github.com/stumpylog/tika-client/pull/33)) +- Bump codecov/codecov-action from 4 to 5 by (by [@dependabot](https://github.com/apps/dependabot)) ([#32](https://github.com/stumpylog/tika-client/pull/32)) ## [0.7.0] - 2024-10-09 From 981965a4d312408e89458863d495eff1391448a9 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:26:47 -0800 Subject: [PATCH 06/11] Feature: Integrate Codecov test analytics (#34) --- .github/workflows/ci.yml | 17 +++++++++++++---- CHANGELOG.md | 4 ++++ README.md | 4 ++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 021f5e4..ee15d7c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,6 +70,10 @@ jobs: with: python-version: ${{ matrix.python-version }} cache: 'pip' + - + name: Pull Docker images + run: | + docker compose --file tests/docker/docker-compose.ci-test.yml pull - name: Install Hatch run: | @@ -83,15 +87,20 @@ jobs: - name: Run tests run: | - hatch test --cover --python ${{ matrix.python-version }} - ls -ahl . + hatch test --cover --junitxml=junit.xml -o junit_family=legacy --python ${{ matrix.python-version }} - name: Upload coverage to Codecov - if: matrix.python-version == '3.10' + if: matrix.python-version == '3.11' uses: codecov/codecov-action@v5 with: - # not required for public repos, but intermittently fails otherwise token: ${{ secrets.CODECOV_TOKEN }} + - + name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + flags: python-${{ matrix.python-version }} build: name: Build diff --git a/CHANGELOG.md b/CHANGELOG.md index 3221e22..0cb10f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Bump pypa/gh-action-pypi-publish from 1.10.2 to 1.12.2 (by [@dependabot](https://github.com/apps/dependabot) in [#33](https://github.com/stumpylog/tika-client/pull/33)) - Bump codecov/codecov-action from 4 to 5 by (by [@dependabot](https://github.com/apps/dependabot)) ([#32](https://github.com/stumpylog/tika-client/pull/32)) +### Added + +- Integrated Codecov test analytics ([#34](https://github.com/stumpylog/tika-client/pull/34)) + ## [0.7.0] - 2024-10-09 ### Added diff --git a/README.md b/README.md index 146a70c..9f9b37f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ --- -**Table of Contents** +## Table of Contents - [Features](#features) - [Installation](#installation) @@ -17,7 +17,7 @@ ## Features - Simplified: No need to worry about XML or JSON responses, downloading a Tika jar file or Python 2 -- Support for Tika 2+ only +- Support for Tika 2+ only (including Tika v3, which didn't change the API) - Based on the modern [httpx](https://github.com/encode/httpx) library - Full support for type hinting - Nearly full test coverage run against an actual Tika server for multiple Python and PyPy versions From 5943f4a318742b45ce4bd18adfa2943165ea95be Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:31:10 -0800 Subject: [PATCH 07/11] Bumps dependency versions (#35) --- .pre-commit-config.yaml | 6 +++--- pyproject.toml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e19aeb4..2f24303 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,7 @@ repos: - id: detect-private-key # See https://github.com/prettier/prettier/issues/15742 for the fork reason - repo: https://github.com/rbubley/mirrors-prettier - rev: "v3.3.3" + rev: "v3.4.2" hooks: - id: prettier types_or: @@ -41,13 +41,13 @@ repos: - id: codespell # Python hooks - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.6.9' + rev: 'v0.8.2' hooks: # Run the linter. - id: ruff # Run the formatter. - id: ruff-format - repo: https://github.com/tox-dev/pyproject-fmt - rev: "2.2.4" + rev: "v2.5.0" hooks: - id: pyproject-fmt diff --git a/pyproject.toml b/pyproject.toml index 7407992..cfbfd7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ installer = "uv" [tool.hatch.envs.hatch-static-analysis] # https://hatch.pypa.io/latest/config/internal/static-analysis/ -dependencies = [ "ruff ~= 0.6" ] +dependencies = [ "ruff ~= 0.8" ] config-path = "none" [tool.hatch.envs.hatch-test] @@ -117,7 +117,7 @@ python = [ "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.8", "pypy3.9", " [tool.hatch.envs.typing] detached = true dependencies = [ - "mypy ~= 1.11", + "mypy ~= 1.13", "httpx", ] From a92266c7af89e9201fd62d41faab6687a8ce2a5c Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:40:04 -0800 Subject: [PATCH 08/11] Breaking: Drop support for Python 3.8 (#36) --- .github/workflows/ci.yml | 10 +++++----- CHANGELOG.md | 4 ++++ pyproject.toml | 18 +++++++----------- src/tika_client/__init__.py | 2 +- tests/conftest.py | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee15d7c..1b2e2a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,10 +25,10 @@ jobs: - uses: actions/checkout@v4 - - name: Set up Python 3.10 + name: Set up Python 3.11 uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' cache: 'pip' - name: Install Hatch @@ -58,7 +58,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13', 'pypy3.9', 'pypy3.10'] + python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13', 'pypy3.9', 'pypy3.10'] steps: - @@ -113,10 +113,10 @@ jobs: - uses: actions/checkout@v4 - - name: Set up Python 3.10 + name: Set up Python 3.11 uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' cache: 'pip' - name: Install Hatch diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cb10f8..689102e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Breaking Change + +- Dropped support for Python 3.8 ([#36](https://github.com/stumpylog/tika-client/pull/36)) + ### Fixed - Tests failed when run with Tika v3 ([#28](https://github.com/stumpylog/tika-client/pull/28)) diff --git a/pyproject.toml b/pyproject.toml index cfbfd7e..bba1730 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ license = "MPL-2.0" authors = [ { name = "Trenton H", email = "rda0128ou@mozmail.com" }, ] -requires-python = ">=3.8" +requires-python = ">=3.9" classifiers = [ "Development Status :: 4 - Beta", "Environment :: Web Environment", @@ -25,7 +25,6 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -36,8 +35,7 @@ classifiers = [ ] dynamic = [ "version" ] dependencies = [ - "httpx~=0.24; python_version<'3.9'", - "httpx~=0.27; python_version>='3.9'", + "httpx~=0.28", "typing-extensions; python_version<'3.11'", ] @@ -74,8 +72,7 @@ randomize = true dependencies = [ "coverage-enable-subprocess == 1.0", "coverage[toml] ~= 7.6", - "pytest < 8.0; python_version < '3.9'", - "pytest ~= 8.3; python_version >= '3.9'", + "pytest ~= 8.3", "pytest-mock ~= 3.14", "pytest-randomly ~= 3.15", "pytest-rerunfailures ~= 14.0", @@ -83,8 +80,7 @@ dependencies = [ ] extra-dependencies = [ "pytest-sugar", - "pytest-httpx == 0.30.0; python_version >= '3.9'", - "pytest-httpx ~= 0.22; python_version < '3.9'", + "pytest-httpx ~= 0.33", "python-magic", "pytest-docker ~= 3.1", ] @@ -109,7 +105,7 @@ cov-report = [ ] [[tool.hatch.envs.hatch-test.matrix]] -python = [ "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.8", "pypy3.9", "pypy3.10" ] +python = [ "3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.8", "pypy3.9", "pypy3.10" ] # # Custom Environments @@ -144,7 +140,7 @@ update = [ "pre-commit autoupdate" ] # [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 # https://docs.astral.sh/ruff/settings/ @@ -239,7 +235,7 @@ lint.isort.known-first-party = [ "tika_client" ] max_supported_python = "3.13" [tool.pytest.ini_options] -minversion = "7.0" +minversion = "8.0" testpaths = [ "tests" ] [tool.coverage.run] diff --git a/src/tika_client/__init__.py b/src/tika_client/__init__.py index 26ea4af..f635806 100644 --- a/src/tika_client/__init__.py +++ b/src/tika_client/__init__.py @@ -7,4 +7,4 @@ from tika_client.data_models import TikaKey from tika_client.data_models import XmpKey -__all__ = ["TikaClient", "TikaKey", "XmpKey", "DublinCoreKey"] +__all__ = ["DublinCoreKey", "TikaClient", "TikaKey", "XmpKey"] diff --git a/tests/conftest.py b/tests/conftest.py index e9789c2..eff7849 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ import logging +from collections.abc import Generator from pathlib import Path -from typing import Generator import pytest from pytest_docker.plugin import Services From 26b7579ac9d59a5cc4f2a497935242f8837d5d05 Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:47:02 -0800 Subject: [PATCH 09/11] Link to some key definitions --- src/tika_client/data_models.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/tika_client/data_models.py b/src/tika_client/data_models.py index b541e32..285d69f 100644 --- a/src/tika_client/data_models.py +++ b/src/tika_client/data_models.py @@ -25,13 +25,26 @@ class TikaKey(str, Enum): + """ + Based on + - https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=235835139#MetadataOverview-TikaProcess + - https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=235835139#MetadataOverview-TikaGeneral + """ + Parsers = "X-TIKA:Parsed-By" + Parser_Full = "X-TIKA:Parsed-By-Full-Set" + Parse_Time = "X-TIKA:parse_time_millis" ContentType = "Content-Type" ContentLength = "Content-Length" Content = "X-TIKA:content" class DublinCoreKey(str, Enum): + """ + Based on: + - https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=235835139#MetadataOverview-DublinCore + """ + Creator = "dc:creator" Created = "dcterms:created" Modified = "dcterms:modified" @@ -49,6 +62,11 @@ class DublinCoreKey(str, Enum): class XmpKey(str, Enum): + """ + Based on: + - https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=235835139#MetadataOverview-XMP(eXtensibleMetadataPlatform) + """ + About = "xmp:About" Created = "xmp:CreateDate" NumPages = "xmpTPg:NPages" From de71ebc31bdfb24da616b39467475700c731c719 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:11:52 -0800 Subject: [PATCH 10/11] Relaxed version restrictions on httpx --- CHANGELOG.md | 1 + pyproject.toml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 689102e..c3f1950 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Tests failed when run with Tika v3 ([#28](https://github.com/stumpylog/tika-client/pull/28)) +- Relaxed version restriction on `httpx` ### Changed diff --git a/pyproject.toml b/pyproject.toml index bba1730..d705baa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ classifiers = [ ] dynamic = [ "version" ] dependencies = [ - "httpx~=0.28", + "httpx>=0.27", "typing-extensions; python_version<'3.11'", ] @@ -75,7 +75,7 @@ dependencies = [ "pytest ~= 8.3", "pytest-mock ~= 3.14", "pytest-randomly ~= 3.15", - "pytest-rerunfailures ~= 14.0", + "pytest-rerunfailures ~= 15.0", "pytest-xdist[psutil] ~= 3.6", ] extra-dependencies = [ From a316855bdc6b02f1356293b239845fdaaf8bf3c0 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:16:15 -0800 Subject: [PATCH 11/11] Bumps version to 0.8.0 --- CHANGELOG.md | 2 +- src/tika_client/__about__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3f1950..58bd84e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.8.0] - 2024-12-17 ### Breaking Change diff --git a/src/tika_client/__about__.py b/src/tika_client/__about__.py index 7404394..9c90f17 100644 --- a/src/tika_client/__about__.py +++ b/src/tika_client/__about__.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: 2023-present Trenton H # # SPDX-License-Identifier: MPL-2.0 -__version__ = "0.7.0" +__version__ = "0.8.0"