-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7d8d077
commit 0af498f
Showing
9 changed files
with
2,867 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# clean-links | ||
|
||
[![Release](https://img.shields.io/github/v/release/stringertheory/clean-links)](https://img.shields.io/github/v/release/stringertheory/clean-links) | ||
[![Build status](https://img.shields.io/github/actions/workflow/status/stringertheory/clean-links/main.yml?branch=main)](https://github.com/stringertheory/clean-links/actions/workflows/main.yml?query=branch%3Amain) | ||
[![codecov](https://codecov.io/gh/stringertheory/clean-links/branch/main/graph/badge.svg)](https://codecov.io/gh/stringertheory/clean-links) | ||
[![Commit activity](https://img.shields.io/github/commit-activity/m/stringertheory/clean-links)](https://img.shields.io/github/commit-activity/m/stringertheory/clean-links) | ||
[![License](https://img.shields.io/github/license/stringertheory/clean-links)](https://img.shields.io/github/license/stringertheory/clean-links) | ||
|
||
Tools for cleaning up linkss | ||
|
||
- **Github repository**: <https://github.com/stringertheory/clean-links/> | ||
- **Documentation** <https://stringertheory.github.io/clean-links/> | ||
|
||
## Getting started with your project | ||
|
||
First, create a repository on GitHub with the same name as this project, and then run the following commands: | ||
|
||
```bash | ||
git init -b main | ||
git add . | ||
git commit -m "init commit" | ||
git remote add origin [email protected]:stringertheory/clean-links.git | ||
git push -u origin main | ||
``` | ||
|
||
Finally, install the environment and the pre-commit hooks with | ||
|
||
```bash | ||
make install | ||
``` | ||
|
||
You are now ready to start development on your project! | ||
The CI/CD pipeline will be triggered when you open a pull request, merge to main, or when you create a new release. | ||
|
||
To finalize the set-up for publishing to PyPi or Artifactory, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/publishing/#set-up-for-pypi). | ||
For activating the automatic documentation with MkDocs, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/mkdocs/#enabling-the-documentation-on-github). | ||
To enable the code coverage reports, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/codecov/). | ||
|
||
## Releasing a new version | ||
|
||
- Create an API Token on [Pypi](https://pypi.org/). | ||
- Add the API Token to your projects secrets with the name `PYPI_TOKEN` by visiting [this page](https://github.com/stringertheory/clean-links/settings/secrets/actions/new). | ||
- Create a [new release](https://github.com/stringertheory/clean-links/releases/new) on Github. | ||
- Create a new tag in the form `*.*.*`. | ||
|
||
For more details, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/cicd/#how-to-trigger-a-release). | ||
|
||
--- | ||
|
||
Repository initiated with [fpgmaas/cookiecutter-poetry](https://github.com/fpgmaas/cookiecutter-poetry). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import logging | ||
import re | ||
from urllib.parse import parse_qs, urlencode, urlsplit | ||
|
||
from clean_links.config import read_config | ||
from clean_links.unshorten import unshorten_url | ||
|
||
clear_urls_rules = read_config() | ||
|
||
|
||
def query_string(url: str, rules: list) -> str: | ||
split = urlsplit(url) | ||
params = parse_qs(split.query) | ||
|
||
delete_keys = {None, ""} | ||
for rule in rules: | ||
for key in params: | ||
if re.match("^" + rule + "$", key, flags=re.IGNORECASE): | ||
delete_keys.add(key) | ||
|
||
for delete_key in delete_keys: | ||
params.pop(delete_key, "") # type: ignore[arg-type] | ||
|
||
params_string = urlencode(params, doseq=True) | ||
|
||
if params_string: | ||
return split.path + "?" + params_string | ||
else: | ||
return split.path | ||
|
||
|
||
def match_provider(provider: str, url: str, rules: dict) -> bool: | ||
match_url = re.match(rules["urlPattern"], url) | ||
match_exception = None | ||
for exception_pattern in rules["exceptions"]: | ||
try: | ||
match_exception = re.match(exception_pattern, url) | ||
except Exception: | ||
logging.exception( | ||
f"something's wrong with regex {exception_pattern!r} " | ||
f"for provider {provider!r}." | ||
) | ||
|
||
if match_exception: | ||
break | ||
return bool(match_url and not match_exception) | ||
|
||
|
||
def clear_url( | ||
url: str, keep_query: bool = True, keep_fragment: bool = True | ||
) -> str: | ||
for provider_name, rules in clear_urls_rules["providers"].items(): | ||
if match_provider(provider_name, url, rules): | ||
for rule in rules["rawRules"]: | ||
url = re.sub(rule, "", url, flags=re.IGNORECASE) | ||
|
||
split = urlsplit(url) | ||
if keep_query: | ||
full_path = query_string(url, rules["rules"]) | ||
else: | ||
full_path = split.path | ||
|
||
relative = full_path | ||
if keep_fragment: | ||
fragment_path = query_string(split.fragment, rules["rules"]) | ||
if fragment_path: | ||
relative += "#" + fragment_path | ||
|
||
url = f"{split.scheme}://{split.netloc}{relative}" | ||
|
||
return url | ||
|
||
|
||
def main() -> None: | ||
url = "https://www.amazon.com/Kobo-Glare-Free-Touchscreen-ComfortLight-Adjustable/dp/B0BCXLQNCC/ref=pd_ci_mcx_mh_mcx_views_0?pd_rd_w=Dx5dF&content-id=amzn1.sym.225b4624-972d-4629-9040-f1bf9923dd95%3Aamzn1.symc.40e6a10e-cbc4-4fa5-81e3-4435ff64d03b&pf_rd_p=225b4624-972d-4629-9040-f1bf9923dd95&pf_rd_r=A7JSDJGYR33BN5GRCV7V&pd_rd_wg=xW6Yf&pd_rd_r=4b8a3532-9e28-4857-a929-5e572d2c765f&pd_rd_i=B0BCXLQNCC" | ||
|
||
url = "https://trib.al/5m7fAg3" | ||
# url = "https://tinyurl.com/yc2ft9m5" | ||
# url = "https://bit.ly/3C4WXQ9" | ||
# url = 'https://tinyurl.com/NewwAlemAndKibrom' | ||
# url = "https://hubs.la/Q01HRjhm0" | ||
# url = "https://buff.ly/3Omwkwd" | ||
# url = "https://bit.ly/48RtRlw" | ||
# url = "https://srv.buysellads.com/ads/long/x/TCHU7KSHTTTTTTH6NPRNPTTTTTTFNZMBKWTTTTTTA4RZC7VTTTTTTBZI5HINWLB6G3DIEMS4PABU5AIEQQY6BADG2HUT" | ||
# url = "https://buff.ly/2RjYjMt" | ||
|
||
print(url) | ||
print() | ||
resolved = unshorten_url(url).get("resolved", "") | ||
print(resolved) | ||
print() | ||
clear = clear_url(resolved) # , keep_query=False, keep_fragment=False) | ||
print(clear) | ||
# print(url) | ||
# original, resolved, status = resolve_url(url, 10) | ||
# print(original) | ||
# print(resolved) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.