From 8f8c4d9b1d56f619b86548eb23a3ccb5b23c501a Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Wed, 26 Jun 2024 15:19:04 -0400 Subject: [PATCH 01/31] try storing qcportal cache on clients --- openff/qcsubmit/results/results.py | 71 +++++++++++++++++++++++++----- openff/qcsubmit/utils/utils.py | 15 +++++++ 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/openff/qcsubmit/results/results.py b/openff/qcsubmit/results/results.py index cf2afc97..83dac71d 100644 --- a/openff/qcsubmit/results/results.py +++ b/openff/qcsubmit/results/results.py @@ -27,6 +27,7 @@ from openff.toolkit.typing.engines.smirnoff import ForceField from openff.units import unit from qcportal import PortalClient +from qcportal.cache import get_records_with_cache from qcportal.dataset_models import BaseDataset as QCPDataset from qcportal.optimization import OptimizationDataset, OptimizationRecord from qcportal.record_models import BaseRecord, RecordStatusEnum @@ -43,6 +44,7 @@ from openff.qcsubmit.datasets import BasicDataset from openff.qcsubmit.exceptions import RecordTypeError from openff.qcsubmit.utils.smirnoff import smirnoff_coverage, smirnoff_torsion_coverage +from openff.qcsubmit.utils.utils import client_record_cache if TYPE_CHECKING: from openff.qcsubmit.results.filters import ResultFilter @@ -382,10 +384,22 @@ def to_records(self) -> List[Tuple[SinglepointRecord, Molecule]]: for client_address, records in self.entries.items(): client = PortalClient(client_address) + use_cache = client_record_cache(client) # TODO - batching/chunking (maybe in portal?) for record in records: - rec = client.get_singlepoints(record.record_id, include=["molecule"]) + if use_cache: + rec = get_records_with_cache( + client, + client.record_cache, + SinglepointRecord, + [record.record_id], + include=["molecule"], + )[0] + else: + rec = client.get_singlepoints( + record.record_id, include=["molecule"] + ) # OpenFF molecule try: @@ -534,12 +548,23 @@ def to_records(self) -> List[Tuple[OptimizationRecord, Molecule]]: for client_address, results in self.entries.items(): client = PortalClient(client_address) + use_cache = client_record_cache(client) - rec_ids = [result.record_id for result in results] # Do one big request to save time - opt_records = client.get_optimizations( - rec_ids, include=["initial_molecule", "final_molecule"] - ) + rec_ids = [result.record_id for result in results] + + if use_cache: + opt_records = get_records_with_cache( + client, + client.record_cache, + OptimizationRecord, + rec_ids, + include=["initial_molecule", "final_molecule"], + ) + else: + opt_records = client.get_optimizations( + rec_ids, include=["initial_molecule", "final_molecule"] + ) # Sort out which records from the request line up with which results opt_rec_id_to_result = dict() for result in results: @@ -803,13 +828,23 @@ def to_records(self) -> List[Tuple[TorsiondriveRecord, Molecule]]: for client_address, records in self.entries.items(): client = PortalClient(client_address) + use_cache = client_record_cache(client) # retrieve all torsiondrives at once, including their # minimum_optimizations record_ids = [r.record_id for r in records] - torsion_drive_records = client.get_torsiondrives( - record_ids, include=["minimum_optimizations"] - ) + if use_cache: + torsion_drive_records = get_records_with_cache( + client, + client.record_cache, + TorsiondriveRecord, + record_ids, + include=["minimum_optimizations"], + ) + else: + torsion_drive_records = client.get_torsiondrives( + record_ids, include=["minimum_optimizations"] + ) for record, rec in zip(records, torsion_drive_records): # OpenFF molecule try: @@ -825,10 +860,26 @@ def to_records(self) -> List[Tuple[TorsiondriveRecord, Molecule]]: # retrieve all minimum_optimizations at once opt_ids = [v.id for v in rec.minimum_optimizations.values()] - opt_records = client.get_optimizations(opt_ids) + if use_cache: + opt_records = get_records_with_cache( + client, + client.record_cache, + OptimizationRecord, + opt_ids, + ) + else: + opt_records = client.get_optimizations(opt_ids) # retrieve all final_molecules at once opt_final_molecule_ids = [r.final_molecule_id for r in opt_records] - opt_final_molecules = client.get_molecules(opt_final_molecule_ids) + if use_cache: + opt_final_molecules = get_records_with_cache( + client, + client.record_cache, + qcportal.molecules.Molecule, # not sure this is right + opt_final_molecule_ids, + ) + else: + opt_final_molecules = client.get_molecules(opt_final_molecule_ids) # Map of torsion drive keys to minimum optimization qc_grid_molecules = list( zip(rec.minimum_optimizations.keys(), opt_final_molecules) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 08797a07..62fe785e 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -5,6 +5,21 @@ RDKitToolkitWrapper, UndefinedStereochemistryError, ) +from qcportal import PortalClient +from qcportal.cache import RecordCache + + +def client_record_cache(client: PortalClient) -> bool: + """Initialize a RecordCache stored on `client` if the client already has a + cache_dir. Returns whether or not this was the case (whether caching should + be used) + + """ + if client.cache and client.cache.cache_dir: + if not hasattr(client, "record_cache"): + client.record_cache = RecordCache(client.cache.cache_dir, read_only=False) + return True + return False def get_data(relative_path): From 4c649b290005f2fada3b9608ccbccb126cfcce9e Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Wed, 26 Jun 2024 16:35:31 -0400 Subject: [PATCH 02/31] generate a filename not just directory --- openff/qcsubmit/utils/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 62fe785e..bbe53852 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -1,3 +1,4 @@ +import os from typing import Dict, Generator, List, Tuple from openff.toolkit import topology as off @@ -17,7 +18,9 @@ def client_record_cache(client: PortalClient) -> bool: """ if client.cache and client.cache.cache_dir: if not hasattr(client, "record_cache"): - client.record_cache = RecordCache(client.cache.cache_dir, read_only=False) + client.record_cache = RecordCache( + os.path.join(client.cache.cache_dir, "cache.sqlite"), read_only=False + ) return True return False From 6d44b856aadfcec818f7c2cf3bc216154b793340 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 10:20:37 -0400 Subject: [PATCH 03/31] Revert "generate a filename not just directory" This reverts commit 4c649b290005f2fada3b9608ccbccb126cfcce9e. --- openff/qcsubmit/utils/utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index bbe53852..62fe785e 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -1,4 +1,3 @@ -import os from typing import Dict, Generator, List, Tuple from openff.toolkit import topology as off @@ -18,9 +17,7 @@ def client_record_cache(client: PortalClient) -> bool: """ if client.cache and client.cache.cache_dir: if not hasattr(client, "record_cache"): - client.record_cache = RecordCache( - os.path.join(client.cache.cache_dir, "cache.sqlite"), read_only=False - ) + client.record_cache = RecordCache(client.cache.cache_dir, read_only=False) return True return False From 6f3db657bf12bf003c7d85d410701826b5657679 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 10:20:46 -0400 Subject: [PATCH 04/31] Revert "try storing qcportal cache on clients" This reverts commit 8f8c4d9b1d56f619b86548eb23a3ccb5b23c501a. --- openff/qcsubmit/results/results.py | 71 +++++------------------------- openff/qcsubmit/utils/utils.py | 15 ------- 2 files changed, 10 insertions(+), 76 deletions(-) diff --git a/openff/qcsubmit/results/results.py b/openff/qcsubmit/results/results.py index 83dac71d..cf2afc97 100644 --- a/openff/qcsubmit/results/results.py +++ b/openff/qcsubmit/results/results.py @@ -27,7 +27,6 @@ from openff.toolkit.typing.engines.smirnoff import ForceField from openff.units import unit from qcportal import PortalClient -from qcportal.cache import get_records_with_cache from qcportal.dataset_models import BaseDataset as QCPDataset from qcportal.optimization import OptimizationDataset, OptimizationRecord from qcportal.record_models import BaseRecord, RecordStatusEnum @@ -44,7 +43,6 @@ from openff.qcsubmit.datasets import BasicDataset from openff.qcsubmit.exceptions import RecordTypeError from openff.qcsubmit.utils.smirnoff import smirnoff_coverage, smirnoff_torsion_coverage -from openff.qcsubmit.utils.utils import client_record_cache if TYPE_CHECKING: from openff.qcsubmit.results.filters import ResultFilter @@ -384,22 +382,10 @@ def to_records(self) -> List[Tuple[SinglepointRecord, Molecule]]: for client_address, records in self.entries.items(): client = PortalClient(client_address) - use_cache = client_record_cache(client) # TODO - batching/chunking (maybe in portal?) for record in records: - if use_cache: - rec = get_records_with_cache( - client, - client.record_cache, - SinglepointRecord, - [record.record_id], - include=["molecule"], - )[0] - else: - rec = client.get_singlepoints( - record.record_id, include=["molecule"] - ) + rec = client.get_singlepoints(record.record_id, include=["molecule"]) # OpenFF molecule try: @@ -548,23 +534,12 @@ def to_records(self) -> List[Tuple[OptimizationRecord, Molecule]]: for client_address, results in self.entries.items(): client = PortalClient(client_address) - use_cache = client_record_cache(client) - # Do one big request to save time rec_ids = [result.record_id for result in results] - - if use_cache: - opt_records = get_records_with_cache( - client, - client.record_cache, - OptimizationRecord, - rec_ids, - include=["initial_molecule", "final_molecule"], - ) - else: - opt_records = client.get_optimizations( - rec_ids, include=["initial_molecule", "final_molecule"] - ) + # Do one big request to save time + opt_records = client.get_optimizations( + rec_ids, include=["initial_molecule", "final_molecule"] + ) # Sort out which records from the request line up with which results opt_rec_id_to_result = dict() for result in results: @@ -828,23 +803,13 @@ def to_records(self) -> List[Tuple[TorsiondriveRecord, Molecule]]: for client_address, records in self.entries.items(): client = PortalClient(client_address) - use_cache = client_record_cache(client) # retrieve all torsiondrives at once, including their # minimum_optimizations record_ids = [r.record_id for r in records] - if use_cache: - torsion_drive_records = get_records_with_cache( - client, - client.record_cache, - TorsiondriveRecord, - record_ids, - include=["minimum_optimizations"], - ) - else: - torsion_drive_records = client.get_torsiondrives( - record_ids, include=["minimum_optimizations"] - ) + torsion_drive_records = client.get_torsiondrives( + record_ids, include=["minimum_optimizations"] + ) for record, rec in zip(records, torsion_drive_records): # OpenFF molecule try: @@ -860,26 +825,10 @@ def to_records(self) -> List[Tuple[TorsiondriveRecord, Molecule]]: # retrieve all minimum_optimizations at once opt_ids = [v.id for v in rec.minimum_optimizations.values()] - if use_cache: - opt_records = get_records_with_cache( - client, - client.record_cache, - OptimizationRecord, - opt_ids, - ) - else: - opt_records = client.get_optimizations(opt_ids) + opt_records = client.get_optimizations(opt_ids) # retrieve all final_molecules at once opt_final_molecule_ids = [r.final_molecule_id for r in opt_records] - if use_cache: - opt_final_molecules = get_records_with_cache( - client, - client.record_cache, - qcportal.molecules.Molecule, # not sure this is right - opt_final_molecule_ids, - ) - else: - opt_final_molecules = client.get_molecules(opt_final_molecule_ids) + opt_final_molecules = client.get_molecules(opt_final_molecule_ids) # Map of torsion drive keys to minimum optimization qc_grid_molecules = list( zip(rec.minimum_optimizations.keys(), opt_final_molecules) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 62fe785e..08797a07 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -5,21 +5,6 @@ RDKitToolkitWrapper, UndefinedStereochemistryError, ) -from qcportal import PortalClient -from qcportal.cache import RecordCache - - -def client_record_cache(client: PortalClient) -> bool: - """Initialize a RecordCache stored on `client` if the client already has a - cache_dir. Returns whether or not this was the case (whether caching should - be used) - - """ - if client.cache and client.cache.cache_dir: - if not hasattr(client, "record_cache"): - client.record_cache = RecordCache(client.cache.cache_dir, read_only=False) - return True - return False def get_data(relative_path): From da52d5e479847767cf1b4bac9e25ec448ef84c2e Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 10:31:39 -0400 Subject: [PATCH 05/31] add outline of CachedPortalClient with signatures from qcportal --- openff/qcsubmit/utils/utils.py | 78 +++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 4cf219a1..d1af9d44 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -1,5 +1,17 @@ +import logging +import os from contextlib import contextmanager -from typing import Callable, Dict, Generator, List, Tuple +from typing import ( + Callable, + Dict, + Generator, + Iterable, + List, + Optional, + Sequence, + Tuple, + Union, +) from openff.toolkit import topology as off from openff.toolkit.utils.toolkits import ( @@ -7,6 +19,70 @@ UndefinedStereochemistryError, ) from qcportal import PortalClient +from qcportal.cache import RecordCache, get_records_with_cache +from qcportal.molecules import Molecule +from qcportal.optimization.record_models import OptimizationRecord +from qcportal.singlepoint.record_models import SinglepointRecord +from qcportal.torsiondrive.record_models import TorsiondriveRecord + +logger = logging.getLogger(__name__) + + +class CachedPortalClient(PortalClient): + def __init__(self, addr, cache_dir, **client_kwargs): + super().__init__(addr, cache_dir=cache_dir, **client_kwargs) + self.record_cache = RecordCache( + os.path.join(self.cache.cache_dir, "cache.sqlite"), read_only=False + ) + + def get_optimizations( + self, + record_ids: Union[int, Sequence[int]], + missing_ok: bool = False, + *, + include: Optional[Iterable[str]] = None, + ) -> Union[Optional[OptimizationRecord], List[Optional[OptimizationRecord]]]: + if missing_ok: + logger.warning("missing_ok provided but unused by CachedPortalClient") + return get_records_with_cache( + client=self, + record_cache=self.record_cache, + record_type=OptimizationRecord, + record_ids=record_ids, + include=include, + force_fetch=False, + ) + + def get_singlepoints( + self, + record_ids: Union[int, Sequence[int]], + missing_ok: bool = False, + *, + include: Optional[Iterable[str]] = None, + ) -> Union[Optional[SinglepointRecord], List[Optional[SinglepointRecord]]]: + if missing_ok: + logger.warning("missing_ok provided but unused by CachedPortalClient") + raise NotImplementedError() + + def get_torsiondrives( + self, + record_ids: Union[int, Sequence[int]], + missing_ok: bool = False, + *, + include: Optional[Iterable[str]] = None, + ) -> Union[Optional[TorsiondriveRecord], List[Optional[TorsiondriveRecord]]]: + if missing_ok: + logger.warning("missing_ok provided but unused by CachedPortalClient") + raise NotImplementedError() + + def get_molecules( + self, + molecule_ids: Union[int, Sequence[int]], + missing_ok: bool = False, + ) -> Union[Optional[Molecule], List[Optional[Molecule]]]: + if missing_ok: + logger.warning("missing_ok provided but unused by CachedPortalClient") + raise NotImplementedError() def _default_portal_client(client_address) -> PortalClient: From 6555c20c09a7d3bb301fb9756bd6a7bda2eb4eed Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 10:39:07 -0400 Subject: [PATCH 06/31] default to using the cache hoping to trigger test failures --- openff/qcsubmit/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index d1af9d44..f35ff6b6 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -86,7 +86,7 @@ def get_molecules( def _default_portal_client(client_address) -> PortalClient: - return PortalClient(client_address) + return CachedPortalClient(client_address, cache_dir="./qcsubmit_qcportal_cache") @contextmanager From 2a9ec5b57747cebbe693d94128cae129095fbcbd Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 10:57:02 -0400 Subject: [PATCH 07/31] fill in todos, ensure record_ids are sequences --- openff/qcsubmit/utils/utils.py | 35 +++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index f35ff6b6..d70d6ada 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -44,6 +44,8 @@ def get_optimizations( ) -> Union[Optional[OptimizationRecord], List[Optional[OptimizationRecord]]]: if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") + if not isinstance(record_ids, Sequence): + record_ids = [record_ids] return get_records_with_cache( client=self, record_cache=self.record_cache, @@ -62,7 +64,16 @@ def get_singlepoints( ) -> Union[Optional[SinglepointRecord], List[Optional[SinglepointRecord]]]: if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") - raise NotImplementedError() + if not isinstance(record_ids, Sequence): + record_ids = [record_ids] + return get_records_with_cache( + client=self, + record_cache=self.record_cache, + record_type=SinglepointRecord, + record_ids=record_ids, + include=include, + force_fetch=False, + ) def get_torsiondrives( self, @@ -73,7 +84,16 @@ def get_torsiondrives( ) -> Union[Optional[TorsiondriveRecord], List[Optional[TorsiondriveRecord]]]: if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") - raise NotImplementedError() + if not isinstance(record_ids, Sequence): + record_ids = [record_ids] + return get_records_with_cache( + client=self, + record_cache=self.record_cache, + record_type=TorsiondriveRecord, + record_ids=record_ids, + include=include, + force_fetch=False, + ) def get_molecules( self, @@ -82,7 +102,16 @@ def get_molecules( ) -> Union[Optional[Molecule], List[Optional[Molecule]]]: if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") - raise NotImplementedError() + if not isinstance(molecule_ids, Sequence): + molecule_ids = [molecule_ids] + return get_records_with_cache( + client=self, + record_cache=self.record_cache, + record_type=Molecule, + record_ids=molecule_ids, + include=None, + force_fetch=False, + ) def _default_portal_client(client_address) -> PortalClient: From 05e1d9b377c7885d414eb49e26bbc281af2dbaf8 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 11:13:12 -0400 Subject: [PATCH 08/31] if given a single id, these methods have to return a single record --- openff/qcsubmit/utils/utils.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index d70d6ada..c3cf7f5f 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -45,8 +45,9 @@ def get_optimizations( if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if not isinstance(record_ids, Sequence): + unpack = True record_ids = [record_ids] - return get_records_with_cache( + res = get_records_with_cache( client=self, record_cache=self.record_cache, record_type=OptimizationRecord, @@ -54,6 +55,10 @@ def get_optimizations( include=include, force_fetch=False, ) + if unpack: + return res[0] + else: + return res def get_singlepoints( self, @@ -65,8 +70,9 @@ def get_singlepoints( if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if not isinstance(record_ids, Sequence): + unpack = True record_ids = [record_ids] - return get_records_with_cache( + res = get_records_with_cache( client=self, record_cache=self.record_cache, record_type=SinglepointRecord, @@ -74,6 +80,10 @@ def get_singlepoints( include=include, force_fetch=False, ) + if unpack: + return res[0] + else: + return res def get_torsiondrives( self, @@ -85,8 +95,9 @@ def get_torsiondrives( if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if not isinstance(record_ids, Sequence): + unpack = True record_ids = [record_ids] - return get_records_with_cache( + res = get_records_with_cache( client=self, record_cache=self.record_cache, record_type=TorsiondriveRecord, @@ -94,6 +105,10 @@ def get_torsiondrives( include=include, force_fetch=False, ) + if unpack: + return res[0] + else: + return res def get_molecules( self, @@ -103,8 +118,9 @@ def get_molecules( if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if not isinstance(molecule_ids, Sequence): + unpack = True molecule_ids = [molecule_ids] - return get_records_with_cache( + res = get_records_with_cache( client=self, record_cache=self.record_cache, record_type=Molecule, @@ -112,6 +128,10 @@ def get_molecules( include=None, force_fetch=False, ) + if unpack: + return res[0] + else: + return res def _default_portal_client(client_address) -> PortalClient: From fccfb9c45b56f04f01ab5fdb055bfe0845279b87 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 11:27:56 -0400 Subject: [PATCH 09/31] comment out non-functional get_molecules, add note about root issue --- openff/qcsubmit/utils/utils.py | 52 +++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index c3cf7f5f..e621eb6e 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -20,7 +20,6 @@ ) from qcportal import PortalClient from qcportal.cache import RecordCache, get_records_with_cache -from qcportal.molecules import Molecule from qcportal.optimization.record_models import OptimizationRecord from qcportal.singlepoint.record_models import SinglepointRecord from qcportal.torsiondrive.record_models import TorsiondriveRecord @@ -110,28 +109,35 @@ def get_torsiondrives( else: return res - def get_molecules( - self, - molecule_ids: Union[int, Sequence[int]], - missing_ok: bool = False, - ) -> Union[Optional[Molecule], List[Optional[Molecule]]]: - if missing_ok: - logger.warning("missing_ok provided but unused by CachedPortalClient") - if not isinstance(molecule_ids, Sequence): - unpack = True - molecule_ids = [molecule_ids] - res = get_records_with_cache( - client=self, - record_cache=self.record_cache, - record_type=Molecule, - record_ids=molecule_ids, - include=None, - force_fetch=False, - ) - if unpack: - return res[0] - else: - return res + # Molecule is not a true record type (it's just re-exported from + # qcelemental), so it doesn't play well with the get_records_with_cache + # approach. get_records_with_cache calls client._fetch_records, which peeks + # at record_type.__fields__["record_type"] that Molecules don't have. + # PortalClient.get_molecules does all the work itself instead of delegating + # to _get_records_by_type and _fetch_records + + # def get_molecules( + # self, + # molecule_ids: Union[int, Sequence[int]], + # missing_ok: bool = False, + # ) -> Union[Optional[Molecule], List[Optional[Molecule]]]: + # if missing_ok: + # logger.warning("missing_ok provided but unused by CachedPortalClient") + # if not isinstance(molecule_ids, Sequence): + # unpack = True + # molecule_ids = [molecule_ids] + # res = get_records_with_cache( + # client=self, + # record_cache=self.record_cache, + # record_type=Molecule, + # record_ids=molecule_ids, + # include=None, + # force_fetch=False, + # ) + # if unpack: + # return res[0] + # else: + # return res def _default_portal_client(client_address) -> PortalClient: From ce64b0f59e85acf678b28c762d799a5c122d5c67 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 11:44:52 -0400 Subject: [PATCH 10/31] ensure unpack has a value --- openff/qcsubmit/utils/utils.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index e621eb6e..5abf0a8d 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -43,8 +43,7 @@ def get_optimizations( ) -> Union[Optional[OptimizationRecord], List[Optional[OptimizationRecord]]]: if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") - if not isinstance(record_ids, Sequence): - unpack = True + if unpack := not isinstance(record_ids, Sequence): record_ids = [record_ids] res = get_records_with_cache( client=self, @@ -68,8 +67,7 @@ def get_singlepoints( ) -> Union[Optional[SinglepointRecord], List[Optional[SinglepointRecord]]]: if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") - if not isinstance(record_ids, Sequence): - unpack = True + if unpack := not isinstance(record_ids, Sequence): record_ids = [record_ids] res = get_records_with_cache( client=self, @@ -93,8 +91,7 @@ def get_torsiondrives( ) -> Union[Optional[TorsiondriveRecord], List[Optional[TorsiondriveRecord]]]: if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") - if not isinstance(record_ids, Sequence): - unpack = True + if unpack := not isinstance(record_ids, Sequence): record_ids = [record_ids] res = get_records_with_cache( client=self, @@ -123,8 +120,7 @@ def get_torsiondrives( # ) -> Union[Optional[Molecule], List[Optional[Molecule]]]: # if missing_ok: # logger.warning("missing_ok provided but unused by CachedPortalClient") - # if not isinstance(molecule_ids, Sequence): - # unpack = True + # if unpack := not isinstance(molecule_ids, Sequence): # molecule_ids = [molecule_ids] # res = get_records_with_cache( # client=self, From 859ec44def5b5c47cb5093093b2f150b34853b69 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 15:21:59 -0400 Subject: [PATCH 11/31] delete get_molecules after ben's confirmation at the meeting --- openff/qcsubmit/utils/utils.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 5abf0a8d..30bf3046 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -106,35 +106,6 @@ def get_torsiondrives( else: return res - # Molecule is not a true record type (it's just re-exported from - # qcelemental), so it doesn't play well with the get_records_with_cache - # approach. get_records_with_cache calls client._fetch_records, which peeks - # at record_type.__fields__["record_type"] that Molecules don't have. - # PortalClient.get_molecules does all the work itself instead of delegating - # to _get_records_by_type and _fetch_records - - # def get_molecules( - # self, - # molecule_ids: Union[int, Sequence[int]], - # missing_ok: bool = False, - # ) -> Union[Optional[Molecule], List[Optional[Molecule]]]: - # if missing_ok: - # logger.warning("missing_ok provided but unused by CachedPortalClient") - # if unpack := not isinstance(molecule_ids, Sequence): - # molecule_ids = [molecule_ids] - # res = get_records_with_cache( - # client=self, - # record_cache=self.record_cache, - # record_type=Molecule, - # record_ids=molecule_ids, - # include=None, - # force_fetch=False, - # ) - # if unpack: - # return res[0] - # else: - # return res - def _default_portal_client(client_address) -> PortalClient: return CachedPortalClient(client_address, cache_dir="./qcsubmit_qcportal_cache") From 09f76c37daad105f6919c0c1c44d5356b1e6e3c0 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 9 Jul 2024 15:29:18 -0400 Subject: [PATCH 12/31] factor out default cache dir --- openff/qcsubmit/utils/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 30bf3046..c296634f 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -26,6 +26,8 @@ logger = logging.getLogger(__name__) +_DEFAULT_CACHE_DIR = "./qcsubmit_qcportal_cache" + class CachedPortalClient(PortalClient): def __init__(self, addr, cache_dir, **client_kwargs): @@ -108,7 +110,7 @@ def get_torsiondrives( def _default_portal_client(client_address) -> PortalClient: - return CachedPortalClient(client_address, cache_dir="./qcsubmit_qcportal_cache") + return CachedPortalClient(client_address, cache_dir=_DEFAULT_CACHE_DIR) @contextmanager From f8ae094ca039cdf6b35df74b78073d22c4637551 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Thu, 11 Jul 2024 13:08:42 -0400 Subject: [PATCH 13/31] break the tests to find where updates are needed --- openff/qcsubmit/_tests/results/test_filters.py | 5 ++++- openff/qcsubmit/_tests/results/test_results.py | 12 ++++++++---- openff/qcsubmit/_tests/test_submissions.py | 10 +++++++--- openff/qcsubmit/utils/utils.py | 3 +++ 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/openff/qcsubmit/_tests/results/test_filters.py b/openff/qcsubmit/_tests/results/test_filters.py index 486e961b..ce27ecdc 100644 --- a/openff/qcsubmit/_tests/results/test_filters.py +++ b/openff/qcsubmit/_tests/results/test_filters.py @@ -6,6 +6,7 @@ from openff.toolkit.topology import Molecule from openff.units import unit from qcelemental.models import DriverEnum +from qcportal import PortalClient from qcportal.singlepoint import QCSpecification from openff.qcsubmit._pydantic import ValidationError @@ -31,6 +32,7 @@ SMILESFilter, UnperceivableStereoFilter, ) +from openff.qcsubmit.utils import portal_client_manager from . import RecordStatusEnum, SinglepointRecord @@ -545,5 +547,6 @@ def test_unperceivable_stereo_filter(toolkits, n_expected, public_client): ) assert collection.n_results == 1 - filtered = collection.filter(UnperceivableStereoFilter(toolkits=toolkits)) + with portal_client_manager(PortalClient): + filtered = collection.filter(UnperceivableStereoFilter(toolkits=toolkits)) assert filtered.n_results == n_expected diff --git a/openff/qcsubmit/_tests/results/test_results.py b/openff/qcsubmit/_tests/results/test_results.py index 2308ff3c..9540b488 100644 --- a/openff/qcsubmit/_tests/results/test_results.py +++ b/openff/qcsubmit/_tests/results/test_results.py @@ -8,6 +8,7 @@ from openff.toolkit.topology import Molecule from openff.toolkit.typing.engines.smirnoff import ForceField from qcelemental.models import DriverEnum +from qcportal import PortalClient from qcportal.torsiondrive import ( TorsiondriveKeywords, TorsiondriveRecord, @@ -26,6 +27,7 @@ ) from openff.qcsubmit.results.filters import ResultFilter from openff.qcsubmit.results.results import TorsionDriveResult, _BaseResultCollection +from openff.qcsubmit.utils import portal_client_manager from . import ( OptimizationRecord, @@ -315,7 +317,8 @@ def test_to_records( public_client, collection_name, spec_name=spec_name ) assert collection.n_molecules == expected_n_mols - records_and_molecules = collection.to_records() + with portal_client_manager(PortalClient): + records_and_molecules = collection.to_records() assert len(records_and_molecules) == expected_n_recs record, molecule = records_and_molecules[0] @@ -351,9 +354,10 @@ def test_optimization_to_basic_result_collection(public_client): optimization_result_collection = OptimizationResultCollection.from_server( public_client, ["OpenFF Gen 2 Opt Set 3 Pfizer Discrepancy"] ) - basic_collection = optimization_result_collection.to_basic_result_collection( - "hessian" - ) + with portal_client_manager(PortalClient): + basic_collection = optimization_result_collection.to_basic_result_collection( + "hessian" + ) assert basic_collection.n_results == 197 assert basic_collection.n_molecules == 49 diff --git a/openff/qcsubmit/_tests/test_submissions.py b/openff/qcsubmit/_tests/test_submissions.py index c47e1f38..2f9fdaba 100644 --- a/openff/qcsubmit/_tests/test_submissions.py +++ b/openff/qcsubmit/_tests/test_submissions.py @@ -37,7 +37,7 @@ OptimizationResultCollection, TorsionDriveResultCollection, ) -from openff.qcsubmit.utils import get_data +from openff.qcsubmit.utils import get_data, portal_client_manager def await_results(client, timeout=120, check_fn=PortalClient.get_singlepoints, ids=[1]): @@ -1408,7 +1408,8 @@ def test_invalid_cmiles(fulltest_client, factory_type, result_collection_type): assert ds.specifications.keys() == {"default"} results = result_collection_type.from_datasets(datasets=ds) assert results.n_molecules == 1 - records = results.to_records() + with portal_client_manager(PortalClient): + records = results.to_records() assert len(records) == 1 # Single points and optimizations look here fulltest_client.modify_molecule( @@ -1427,6 +1428,9 @@ def test_invalid_cmiles(fulltest_client, factory_type, result_collection_type): ds._cache_data.update_entries(entries) results = result_collection_type.from_datasets(datasets=ds) assert results.n_molecules == 1 - with pytest.warns(UserWarning, match="invalid CMILES"): + with ( + pytest.warns(UserWarning, match="invalid CMILES"), + portal_client_manager(PortalClient), + ): records = results.to_records() assert len(records) == 0 diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index c296634f..8ff655ec 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -43,6 +43,7 @@ def get_optimizations( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[OptimizationRecord], List[Optional[OptimizationRecord]]]: + raise NotImplementedError() if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): @@ -67,6 +68,7 @@ def get_singlepoints( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[SinglepointRecord], List[Optional[SinglepointRecord]]]: + raise NotImplementedError() if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): @@ -91,6 +93,7 @@ def get_torsiondrives( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[TorsiondriveRecord], List[Optional[TorsiondriveRecord]]]: + raise NotImplementedError() if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): From e408f6d3a98a5ce187b2cee34895f409db1dfb22 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Thu, 11 Jul 2024 13:14:20 -0400 Subject: [PATCH 14/31] re-export CachedPortalClient --- openff/qcsubmit/utils/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openff/qcsubmit/utils/__init__.py b/openff/qcsubmit/utils/__init__.py index 2aea616d..22b1a91f 100644 --- a/openff/qcsubmit/utils/__init__.py +++ b/openff/qcsubmit/utils/__init__.py @@ -1,4 +1,5 @@ from openff.qcsubmit.utils.utils import ( + CachedPortalClient, check_missing_stereo, chunk_generator, clean_strings, @@ -22,4 +23,5 @@ "get_symmetry_classes", "get_symmetry_group", "portal_client_manager", + "CachedPortalClient", ] From b119fcd84554cee946cfa632bd4cfc9c60b74706 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Thu, 11 Jul 2024 13:21:34 -0400 Subject: [PATCH 15/31] fix get_optimizations and filter test --- openff/qcsubmit/_tests/results/test_filters.py | 9 ++++++--- openff/qcsubmit/utils/utils.py | 1 - 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/openff/qcsubmit/_tests/results/test_filters.py b/openff/qcsubmit/_tests/results/test_filters.py index ce27ecdc..64057289 100644 --- a/openff/qcsubmit/_tests/results/test_filters.py +++ b/openff/qcsubmit/_tests/results/test_filters.py @@ -1,12 +1,12 @@ import datetime import logging +from tempfile import TemporaryDirectory import numpy import pytest from openff.toolkit.topology import Molecule from openff.units import unit from qcelemental.models import DriverEnum -from qcportal import PortalClient from qcportal.singlepoint import QCSpecification from openff.qcsubmit._pydantic import ValidationError @@ -32,7 +32,7 @@ SMILESFilter, UnperceivableStereoFilter, ) -from openff.qcsubmit.utils import portal_client_manager +from openff.qcsubmit.utils import CachedPortalClient, portal_client_manager from . import RecordStatusEnum, SinglepointRecord @@ -547,6 +547,9 @@ def test_unperceivable_stereo_filter(toolkits, n_expected, public_client): ) assert collection.n_results == 1 - with portal_client_manager(PortalClient): + with ( + TemporaryDirectory() as d, + portal_client_manager(lambda a: CachedPortalClient(a, cache_dir=d)), + ): filtered = collection.filter(UnperceivableStereoFilter(toolkits=toolkits)) assert filtered.n_results == n_expected diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 8ff655ec..3961362f 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -43,7 +43,6 @@ def get_optimizations( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[OptimizationRecord], List[Optional[OptimizationRecord]]]: - raise NotImplementedError() if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): From 3041e137d5d4527e7ce2003cecf2ed85bf1dcb1d Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Thu, 11 Jul 2024 14:15:47 -0400 Subject: [PATCH 16/31] tests passing again --- openff/qcsubmit/_tests/results/test_results.py | 13 ++++++++++--- openff/qcsubmit/_tests/test_submissions.py | 12 +++++++++--- openff/qcsubmit/utils/utils.py | 2 -- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/openff/qcsubmit/_tests/results/test_results.py b/openff/qcsubmit/_tests/results/test_results.py index 9540b488..10c7812a 100644 --- a/openff/qcsubmit/_tests/results/test_results.py +++ b/openff/qcsubmit/_tests/results/test_results.py @@ -3,6 +3,7 @@ """ import datetime +from tempfile import TemporaryDirectory import pytest from openff.toolkit.topology import Molecule @@ -27,7 +28,7 @@ ) from openff.qcsubmit.results.filters import ResultFilter from openff.qcsubmit.results.results import TorsionDriveResult, _BaseResultCollection -from openff.qcsubmit.utils import portal_client_manager +from openff.qcsubmit.utils import CachedPortalClient, portal_client_manager from . import ( OptimizationRecord, @@ -317,7 +318,10 @@ def test_to_records( public_client, collection_name, spec_name=spec_name ) assert collection.n_molecules == expected_n_mols - with portal_client_manager(PortalClient): + with ( + TemporaryDirectory() as d, + portal_client_manager(lambda a: CachedPortalClient(a, d)), + ): records_and_molecules = collection.to_records() assert len(records_and_molecules) == expected_n_recs record, molecule = records_and_molecules[0] @@ -354,7 +358,10 @@ def test_optimization_to_basic_result_collection(public_client): optimization_result_collection = OptimizationResultCollection.from_server( public_client, ["OpenFF Gen 2 Opt Set 3 Pfizer Discrepancy"] ) - with portal_client_manager(PortalClient): + with ( + TemporaryDirectory() as d, + portal_client_manager(lambda a: CachedPortalClient(a, d)), + ): basic_collection = optimization_result_collection.to_basic_result_collection( "hessian" ) diff --git a/openff/qcsubmit/_tests/test_submissions.py b/openff/qcsubmit/_tests/test_submissions.py index 2f9fdaba..84332d65 100644 --- a/openff/qcsubmit/_tests/test_submissions.py +++ b/openff/qcsubmit/_tests/test_submissions.py @@ -4,6 +4,8 @@ Here we use the qcfractal snowflake fixture to set up the database. """ +from tempfile import TemporaryDirectory + import pytest from openff.toolkit.topology import Molecule from qcelemental.models.procedures import OptimizationProtocols @@ -37,7 +39,7 @@ OptimizationResultCollection, TorsionDriveResultCollection, ) -from openff.qcsubmit.utils import get_data, portal_client_manager +from openff.qcsubmit.utils import CachedPortalClient, get_data, portal_client_manager def await_results(client, timeout=120, check_fn=PortalClient.get_singlepoints, ids=[1]): @@ -1408,7 +1410,10 @@ def test_invalid_cmiles(fulltest_client, factory_type, result_collection_type): assert ds.specifications.keys() == {"default"} results = result_collection_type.from_datasets(datasets=ds) assert results.n_molecules == 1 - with portal_client_manager(PortalClient): + with ( + TemporaryDirectory() as d, + portal_client_manager(lambda a: CachedPortalClient(a, d)), + ): records = results.to_records() assert len(records) == 1 # Single points and optimizations look here @@ -1430,7 +1435,8 @@ def test_invalid_cmiles(fulltest_client, factory_type, result_collection_type): assert results.n_molecules == 1 with ( pytest.warns(UserWarning, match="invalid CMILES"), - portal_client_manager(PortalClient), + TemporaryDirectory() as d, + portal_client_manager(lambda a: CachedPortalClient(a, d)), ): records = results.to_records() assert len(records) == 0 diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 3961362f..c296634f 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -67,7 +67,6 @@ def get_singlepoints( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[SinglepointRecord], List[Optional[SinglepointRecord]]]: - raise NotImplementedError() if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): @@ -92,7 +91,6 @@ def get_torsiondrives( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[TorsiondriveRecord], List[Optional[TorsiondriveRecord]]]: - raise NotImplementedError() if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): From af7b6f776221850a01c1504e1ce4974e6d4d0eba Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Thu, 11 Jul 2024 14:16:15 -0400 Subject: [PATCH 17/31] unused import --- openff/qcsubmit/_tests/results/test_results.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openff/qcsubmit/_tests/results/test_results.py b/openff/qcsubmit/_tests/results/test_results.py index 10c7812a..70841fb4 100644 --- a/openff/qcsubmit/_tests/results/test_results.py +++ b/openff/qcsubmit/_tests/results/test_results.py @@ -9,7 +9,6 @@ from openff.toolkit.topology import Molecule from openff.toolkit.typing.engines.smirnoff import ForceField from qcelemental.models import DriverEnum -from qcportal import PortalClient from qcportal.torsiondrive import ( TorsiondriveKeywords, TorsiondriveRecord, From c4507c6b3d47cc6a73ed193b670d0b608cd669ba Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Thu, 11 Jul 2024 17:03:50 -0400 Subject: [PATCH 18/31] add private _no_session manager for CachedPortalClient --- openff/qcsubmit/utils/utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index c296634f..6c63d3b2 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -108,6 +108,23 @@ def get_torsiondrives( else: return res + @contextmanager + def _no_session(self): + """This is a supplemental context manager to the ``no_internet`` + manager in _tests/utils/test_manager.py. ``PortalClient`` creates a + ``requests.Session`` on initialization that can be reused without + accessing ``socket.socket`` again. Combining ``no_internet`` and + ``client._no_session`` should completely ensure that the local cache is + used rather than re-fetching data from QCArchive. + + """ + tmp = self._req_session + self._req_session = None + try: + yield + finally: + self._req_session = tmp + def _default_portal_client(client_address) -> PortalClient: return CachedPortalClient(client_address, cache_dir=_DEFAULT_CACHE_DIR) From be7babe5c43cbf9d02b44170354726e4717164b6 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Thu, 11 Jul 2024 17:53:49 -0400 Subject: [PATCH 19/31] check that the cache works for optimizations and singlepoints --- .../qcsubmit/_tests/results/test_results.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/openff/qcsubmit/_tests/results/test_results.py b/openff/qcsubmit/_tests/results/test_results.py index 70841fb4..59f19f40 100644 --- a/openff/qcsubmit/_tests/results/test_results.py +++ b/openff/qcsubmit/_tests/results/test_results.py @@ -317,11 +317,22 @@ def test_to_records( public_client, collection_name, spec_name=spec_name ) assert collection.n_molecules == expected_n_mols - with ( - TemporaryDirectory() as d, - portal_client_manager(lambda a: CachedPortalClient(a, d)), - ): - records_and_molecules = collection.to_records() + + with TemporaryDirectory() as d: + client = CachedPortalClient(public_client.address, d) + with portal_client_manager(lambda _: client): + with ( + client._no_session(), + pytest.raises(Exception, match="no attribute 'prepare_request'"), + ): + collection.to_records() + records_and_molecules = collection.to_records() + # TorsionDriveResultCollection.to_records requires fetching + # molecules, which cannot currently be cached + if collection_type is not TorsionDriveResultCollection: + with client._no_session(): + assert len(collection.to_records()) == len(records_and_molecules) + assert len(records_and_molecules) == expected_n_recs record, molecule = records_and_molecules[0] From a740add670385f879f430a89f38ee4c56530dfd9 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Fri, 12 Jul 2024 12:09:40 -0400 Subject: [PATCH 20/31] start CachedPortalClient docs, copy init signature from qcportal --- openff/qcsubmit/utils/utils.py | 50 ++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 6c63d3b2..a17bee37 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -30,8 +30,54 @@ class CachedPortalClient(PortalClient): - def __init__(self, addr, cache_dir, **client_kwargs): - super().__init__(addr, cache_dir=cache_dir, **client_kwargs) + """A cached version of a `qcportal.PortalClient + `. + """ + + def __init__( + self, + address: str, + cache_dir: str, + username: Optional[str] = None, + password: Optional[str] = None, + verify: bool = True, + show_motd: bool = True, + *, + cache_max_size: int = 0, + memory_cache_key: Optional[str] = None, + ): + """Parameters + ---------- + address + The host or IP address of the FractalServer instance, including + protocol and port if necessary ("https://ml.qcarchive.molssi.org", + "http://192.168.1.10:8888") + cache_dir + Directory to store an internal cache of records and other data. + Unlike a normal ``PortalClient``, this argument is required. + username + The username to authenticate with. + password + The password to authenticate with. + verify + Verifies the SSL connection with a third party server. This may be + False if a FractalServer was not provided an SSL certificate and + defaults back to self-signed SSL keys. + show_motd + If a Message-of-the-Day is available, display it + cache_max_size + Maximum size of the cache directory + """ + super().__init__( + address, + username=username, + password=password, + verify=verify, + show_motd=show_motd, + cache_dir=cache_dir, + cache_max_size=cache_max_size, + memory_cache_key=memory_cache_key, + ) self.record_cache = RecordCache( os.path.join(self.cache.cache_dir, "cache.sqlite"), read_only=False ) From 89ba82f213398d6f5660409476439bac71d4e250 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Fri, 12 Jul 2024 12:11:30 -0400 Subject: [PATCH 21/31] override __repr__ --- openff/qcsubmit/utils/utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index a17bee37..c8628a5d 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -82,6 +82,19 @@ def __init__( os.path.join(self.cache.cache_dir, "cache.sqlite"), read_only=False ) + def __repr__(self) -> str: + """A short representation of the current PortalClient. + + Returns + ------- + str + The desired representation. + """ + ret = "CachedPortalClient(server_name='{}', address='{}', username='{}', cache_dir='{}')".format( + self.server_name, self.address, self.username, self.cache.cache_dir + ) + return ret + def get_optimizations( self, record_ids: Union[int, Sequence[int]], From 9850fa31d27e160a320913b193e14e9d5be7b758 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Fri, 12 Jul 2024 12:18:32 -0400 Subject: [PATCH 22/31] copy over and update docs from qcportal --- openff/qcsubmit/utils/utils.py | 66 +++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index c8628a5d..41d06f3b 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -102,6 +102,27 @@ def get_optimizations( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[OptimizationRecord], List[Optional[OptimizationRecord]]]: + """Obtain optimization records with the specified IDs. + + Records will be returned in the same order as the record ids. + + Parameters + ---------- + record_ids + Single ID or sequence/list of records to obtain + missing_ok + Unlike a ``PortalClient``, this argument is ignored. If set to + True, a warning will be printed. Any missing records will cause a + ``RuntimeError`` to be raised. + include + Additional fields to include in the returned record + + Returns + ------- + : + If a single ID was specified, returns just that record. Otherwise, + returns a list of records. + """ if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): @@ -126,6 +147,28 @@ def get_singlepoints( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[SinglepointRecord], List[Optional[SinglepointRecord]]]: + """ + Obtain singlepoint records with the specified IDs. + + Records will be returned in the same order as the record ids. + + Parameters + ---------- + record_ids + Single ID or sequence/list of records to obtain + missing_ok + Unlike a ``PortalClient``, this argument is ignored. If set to + True, a warning will be printed. Any missing records will cause a + ``RuntimeError`` to be raised. + include + Additional fields to include in the returned record + + Returns + ------- + : + If a single ID was specified, returns just that record. Otherwise, + returns a list of records. + """ if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): @@ -150,6 +193,28 @@ def get_torsiondrives( *, include: Optional[Iterable[str]] = None, ) -> Union[Optional[TorsiondriveRecord], List[Optional[TorsiondriveRecord]]]: + """ + Obtain torsiondrive records with the specified IDs. + + Records will be returned in the same order as the record ids. + + Parameters + ---------- + record_ids + Single ID or sequence/list of records to obtain + missing_ok + Unlike a ``PortalClient``, this argument is ignored. If set to + True, a warning will be printed. Any missing records will cause a + ``RuntimeError`` to be raised. + include + Additional fields to include in the returned record + + Returns + ------- + : + If a single ID was specified, returns just that record. Otherwise, + returns a list of records. + """ if missing_ok: logger.warning("missing_ok provided but unused by CachedPortalClient") if unpack := not isinstance(record_ids, Sequence): @@ -175,7 +240,6 @@ def _no_session(self): accessing ``socket.socket`` again. Combining ``no_internet`` and ``client._no_session`` should completely ensure that the local cache is used rather than re-fetching data from QCArchive. - """ tmp = self._req_session self._req_session = None From 0f7c65e67ecedd353ad578e3cdb6683a41d84081 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Fri, 12 Jul 2024 12:46:19 -0400 Subject: [PATCH 23/31] actually add CachedPortalClient to the docs page --- docs/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/api.rst b/docs/api.rst index f1400334..ece36b76 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -230,6 +230,7 @@ Utilities :nosignatures: :toctree: api/generated/ + CachedPortalClient portal_client_manager Exceptions From 136912be33e04476f3b4d2c8b8200b194f75a152 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Fri, 12 Jul 2024 12:56:44 -0400 Subject: [PATCH 24/31] fix link (I hope) --- openff/qcsubmit/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 41d06f3b..fa059925 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -31,7 +31,7 @@ class CachedPortalClient(PortalClient): """A cached version of a `qcportal.PortalClient - `. + `_. """ def __init__( From 9503c21020162c51b3896f54e0beb20b58cf2069 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 16 Jul 2024 14:20:44 -0400 Subject: [PATCH 25/31] test that different clients can share a cache_dir --- openff/qcsubmit/_tests/results/test_results.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/openff/qcsubmit/_tests/results/test_results.py b/openff/qcsubmit/_tests/results/test_results.py index 59f19f40..5be42443 100644 --- a/openff/qcsubmit/_tests/results/test_results.py +++ b/openff/qcsubmit/_tests/results/test_results.py @@ -318,6 +318,11 @@ def test_to_records( ) assert collection.n_molecules == expected_n_mols + def disconnected_client(addr, cache_dir): + ret = CachedPortalClient(addr, cache_dir) + ret._req_session = None + return ret + with TemporaryDirectory() as d: client = CachedPortalClient(public_client.address, d) with portal_client_manager(lambda _: client): @@ -332,6 +337,12 @@ def test_to_records( if collection_type is not TorsionDriveResultCollection: with client._no_session(): assert len(collection.to_records()) == len(records_and_molecules) + # the previous checks show that the *same* client can access + # its cache without making new requests. disconnected_client + # instead shows that a newly-constructed client pointing at the + # same cache_dir can still access the cache + with portal_client_manager(lambda addr: disconnected_client(addr, d)): + assert len(collection.to_records()) == len(records_and_molecules) assert len(records_and_molecules) == expected_n_recs record, molecule = records_and_molecules[0] From fb57acb69598c7196e93bc2b736401e4b3078e73 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 16 Jul 2024 14:22:02 -0400 Subject: [PATCH 26/31] restore PortalClient to default, remove now-unused default cache dir --- openff/qcsubmit/utils/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index fa059925..398bcc18 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -26,8 +26,6 @@ logger = logging.getLogger(__name__) -_DEFAULT_CACHE_DIR = "./qcsubmit_qcportal_cache" - class CachedPortalClient(PortalClient): """A cached version of a `qcportal.PortalClient @@ -250,7 +248,7 @@ def _no_session(self): def _default_portal_client(client_address) -> PortalClient: - return CachedPortalClient(client_address, cache_dir=_DEFAULT_CACHE_DIR) + return PortalClient(client_address) @contextmanager From bfa7e956bb72dfa1f1924f5fb9db11bcbb135041 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 16 Jul 2024 14:24:20 -0400 Subject: [PATCH 27/31] make CachedPortalClient private --- docs/api.rst | 4 ++-- openff/qcsubmit/_tests/results/test_filters.py | 4 ++-- openff/qcsubmit/_tests/results/test_results.py | 8 ++++---- openff/qcsubmit/_tests/test_submissions.py | 6 +++--- openff/qcsubmit/utils/__init__.py | 2 +- openff/qcsubmit/utils/utils.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index ece36b76..c285a1c3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -230,7 +230,7 @@ Utilities :nosignatures: :toctree: api/generated/ - CachedPortalClient + _CachedPortalClient portal_client_manager Exceptions @@ -263,4 +263,4 @@ Exceptions PCMSettingError InvalidDatasetError DatasetRegisterError - RecordTypeError \ No newline at end of file + RecordTypeError diff --git a/openff/qcsubmit/_tests/results/test_filters.py b/openff/qcsubmit/_tests/results/test_filters.py index 64057289..c9fac248 100644 --- a/openff/qcsubmit/_tests/results/test_filters.py +++ b/openff/qcsubmit/_tests/results/test_filters.py @@ -32,7 +32,7 @@ SMILESFilter, UnperceivableStereoFilter, ) -from openff.qcsubmit.utils import CachedPortalClient, portal_client_manager +from openff.qcsubmit.utils import _CachedPortalClient, portal_client_manager from . import RecordStatusEnum, SinglepointRecord @@ -549,7 +549,7 @@ def test_unperceivable_stereo_filter(toolkits, n_expected, public_client): with ( TemporaryDirectory() as d, - portal_client_manager(lambda a: CachedPortalClient(a, cache_dir=d)), + portal_client_manager(lambda a: _CachedPortalClient(a, cache_dir=d)), ): filtered = collection.filter(UnperceivableStereoFilter(toolkits=toolkits)) assert filtered.n_results == n_expected diff --git a/openff/qcsubmit/_tests/results/test_results.py b/openff/qcsubmit/_tests/results/test_results.py index 5be42443..61903862 100644 --- a/openff/qcsubmit/_tests/results/test_results.py +++ b/openff/qcsubmit/_tests/results/test_results.py @@ -27,7 +27,7 @@ ) from openff.qcsubmit.results.filters import ResultFilter from openff.qcsubmit.results.results import TorsionDriveResult, _BaseResultCollection -from openff.qcsubmit.utils import CachedPortalClient, portal_client_manager +from openff.qcsubmit.utils import _CachedPortalClient, portal_client_manager from . import ( OptimizationRecord, @@ -319,12 +319,12 @@ def test_to_records( assert collection.n_molecules == expected_n_mols def disconnected_client(addr, cache_dir): - ret = CachedPortalClient(addr, cache_dir) + ret = _CachedPortalClient(addr, cache_dir) ret._req_session = None return ret with TemporaryDirectory() as d: - client = CachedPortalClient(public_client.address, d) + client = _CachedPortalClient(public_client.address, d) with portal_client_manager(lambda _: client): with ( client._no_session(), @@ -381,7 +381,7 @@ def test_optimization_to_basic_result_collection(public_client): ) with ( TemporaryDirectory() as d, - portal_client_manager(lambda a: CachedPortalClient(a, d)), + portal_client_manager(lambda a: _CachedPortalClient(a, d)), ): basic_collection = optimization_result_collection.to_basic_result_collection( "hessian" diff --git a/openff/qcsubmit/_tests/test_submissions.py b/openff/qcsubmit/_tests/test_submissions.py index 84332d65..6e5823e1 100644 --- a/openff/qcsubmit/_tests/test_submissions.py +++ b/openff/qcsubmit/_tests/test_submissions.py @@ -39,7 +39,7 @@ OptimizationResultCollection, TorsionDriveResultCollection, ) -from openff.qcsubmit.utils import CachedPortalClient, get_data, portal_client_manager +from openff.qcsubmit.utils import _CachedPortalClient, get_data, portal_client_manager def await_results(client, timeout=120, check_fn=PortalClient.get_singlepoints, ids=[1]): @@ -1412,7 +1412,7 @@ def test_invalid_cmiles(fulltest_client, factory_type, result_collection_type): assert results.n_molecules == 1 with ( TemporaryDirectory() as d, - portal_client_manager(lambda a: CachedPortalClient(a, d)), + portal_client_manager(lambda a: _CachedPortalClient(a, d)), ): records = results.to_records() assert len(records) == 1 @@ -1436,7 +1436,7 @@ def test_invalid_cmiles(fulltest_client, factory_type, result_collection_type): with ( pytest.warns(UserWarning, match="invalid CMILES"), TemporaryDirectory() as d, - portal_client_manager(lambda a: CachedPortalClient(a, d)), + portal_client_manager(lambda a: _CachedPortalClient(a, d)), ): records = results.to_records() assert len(records) == 0 diff --git a/openff/qcsubmit/utils/__init__.py b/openff/qcsubmit/utils/__init__.py index 22b1a91f..4e3cd9ad 100644 --- a/openff/qcsubmit/utils/__init__.py +++ b/openff/qcsubmit/utils/__init__.py @@ -1,5 +1,5 @@ from openff.qcsubmit.utils.utils import ( - CachedPortalClient, + _CachedPortalClient, check_missing_stereo, chunk_generator, clean_strings, diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 398bcc18..2b318586 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) -class CachedPortalClient(PortalClient): +class _CachedPortalClient(PortalClient): """A cached version of a `qcportal.PortalClient `_. """ From 938bba98d2703db9a173c0bc1d8c5a2bdb37589e Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 16 Jul 2024 14:27:11 -0400 Subject: [PATCH 28/31] update warning message to be more explicit --- openff/qcsubmit/utils/utils.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 2b318586..c41b26ab 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -122,7 +122,10 @@ def get_optimizations( returns a list of records. """ if missing_ok: - logger.warning("missing_ok provided but unused by CachedPortalClient") + logger.warning( + "missing_ok was set to True, but CachedPortalClient" + " doesn't actually support this so it's being set to False" + ) if unpack := not isinstance(record_ids, Sequence): record_ids = [record_ids] res = get_records_with_cache( @@ -168,7 +171,10 @@ def get_singlepoints( returns a list of records. """ if missing_ok: - logger.warning("missing_ok provided but unused by CachedPortalClient") + logger.warning( + "missing_ok was set to True, but CachedPortalClient" + " doesn't actually support this so it's being set to False" + ) if unpack := not isinstance(record_ids, Sequence): record_ids = [record_ids] res = get_records_with_cache( @@ -214,7 +220,10 @@ def get_torsiondrives( returns a list of records. """ if missing_ok: - logger.warning("missing_ok provided but unused by CachedPortalClient") + logger.warning( + "missing_ok was set to True, but CachedPortalClient" + " doesn't actually support this so it's being set to False" + ) if unpack := not isinstance(record_ids, Sequence): record_ids = [record_ids] res = get_records_with_cache( From 5f3ff4e1f591724fb3e9aa206a8e6973a6b23a67 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 16 Jul 2024 14:31:49 -0400 Subject: [PATCH 29/31] fix unused --- openff/qcsubmit/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/qcsubmit/utils/__init__.py b/openff/qcsubmit/utils/__init__.py index 4e3cd9ad..cd5bcaf0 100644 --- a/openff/qcsubmit/utils/__init__.py +++ b/openff/qcsubmit/utils/__init__.py @@ -23,5 +23,5 @@ "get_symmetry_classes", "get_symmetry_group", "portal_client_manager", - "CachedPortalClient", + "_CachedPortalClient", ] From c63daabdd813c0157744180ff592a3aa7318746a Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Wed, 17 Jul 2024 10:55:56 -0400 Subject: [PATCH 30/31] add a warning to portal_client_manager --- openff/qcsubmit/utils/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index c41b26ab..31da0ed5 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -288,6 +288,9 @@ def portal_client_manager(portal_client_fn: Callable[[str], PortalClient]): >>> with portal_client_manager(my_portal_client): >>> records_and_molecules = ds.to_records() + .. warning:: + It is not safe to share the same client across threads or to construct + multiple clients accessing the same cache database. """ global _default_portal_client original_client_fn = _default_portal_client From 9ab6ed6bf18f45cb86e8ae74abdb2634b1b464b8 Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Wed, 17 Jul 2024 11:35:14 -0400 Subject: [PATCH 31/31] move warning up --- openff/qcsubmit/utils/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openff/qcsubmit/utils/utils.py b/openff/qcsubmit/utils/utils.py index 31da0ed5..fa61cecd 100644 --- a/openff/qcsubmit/utils/utils.py +++ b/openff/qcsubmit/utils/utils.py @@ -269,6 +269,10 @@ def portal_client_manager(portal_client_fn: Callable[[str], PortalClient]): keyword arguments to the ``PortalClient``, such as ``verify=False`` or a ``cache_dir``. + .. warning:: + It is not safe to share the same client across threads or to construct + multiple clients accessing the same cache database. + Parameters ---------- portal_client_fn: @@ -287,10 +291,6 @@ def portal_client_manager(portal_client_fn: Callable[[str], PortalClient]): >>> return PortalClient(client_address, cache_dir=".") >>> with portal_client_manager(my_portal_client): >>> records_and_molecules = ds.to_records() - - .. warning:: - It is not safe to share the same client across threads or to construct - multiple clients accessing the same cache database. """ global _default_portal_client original_client_fn = _default_portal_client