Skip to content

Commit

Permalink
Merge pull request #45 from bamr87:master
Browse files Browse the repository at this point in the history
Clean up
  • Loading branch information
bamr87 authored Nov 29, 2023
2 parents a2ad65a + 5e9244e commit 9a7a097
Show file tree
Hide file tree
Showing 34 changed files with 17,275 additions and 134 deletions.
1 change: 1 addition & 0 deletions .frontmatter/database/pinnedItemsDb.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
1 change: 1 addition & 0 deletions .frontmatter/database/taxonomyDb.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"taxonomy":{"categories":[" ","-","H","Nany","Posts","T","a","cheetsheet","d","e","github","guides","home","j","k","l","machine-setup","n","n00b","notes","o","p","posts","quest","quests","quickstart","s","search","t","u","w","y"]}}
6 changes: 5 additions & 1 deletion .vscode/it-journey.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
"editor.wordWrap": "wordWrapColumn",
"editor.wordWrapColumn": 64,
"editor.lineNumbers": "off",
"editor.quickSuggestions": false,
"editor.quickSuggestions": {
"comments": "off",
"strings": "off",
"other": "off"
},
"editor.minimap.enabled": false
},
"githubPullRequests.ignoredPullRequestBranches": [
Expand Down
17 changes: 10 additions & 7 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,28 @@ source "https://rubygems.org"
# This will help ensure the proper Jekyll version is running.
# Happy Jekylling!
gem 'jekyll' , '3.9.2'

# This is the default theme for new Jekyll sites. You may change this to anything you like.
# If you want to use GitHub Pages, remove the "gem "jekyll"" above and
# uncomment the line below. To upgrade, run `bundle update github-pages`.

gem "github-pages"
# dependancies https://pages.github.com/versions/
# , group: :jekyll_plugins

# If you have any jekyll plugins, put them here!
group :jekyll_plugins do
gem 'jekyll-feed', "~> 0.12"
gem 'jekyll-assets', "~> 1.0.0"
gem 'jekyll-sitemap' , "~> 1.4.0"
gem 'jekyll-seo-tag', "~> 2.8.0"
gem "github-pages"
gem 'jekyll-algolia'
gem 'jekyll-assets', "~> 1.0.0"
gem 'jekyll-mermaid'
# gem 'jekyll-spaceship'

# these are all part of the github-pages gem
# gem 'jekyll-feed', "~> 0.12"
# gem 'jekyll-sitemap' , "~> 1.4.0"
# gem 'jekyll-seo-tag', "~> 2.8.0"
# gem 'jekyll-redirect-from'
# gem 'jekyll-paginate'
# gem 'jekyll-mermaid'
# gem 'jekyll-spaceship'
# gem 'jemoji' # Doesn't work for some reason
end

Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2021 bamr87
Copyright (c) 2023 bamr87.github.io

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
18 changes: 13 additions & 5 deletions _config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,20 +150,28 @@ powered_by:
# All plugins are enabled by default for GitHub Pages. https://pages.github.com/versions/

plugins:
# - github-pages
- github-pages
- jekyll-feed
- jekyll-sitemap
- jekyll-assets
- jekyll-algolia
- jekyll-mermaid # https://mermaidjs.github.io/

# - jekyll-spaceship
# - jemoji # Doesn't work for some reason
# - jekyll-postcss
# - jekyll-redirect-from


# https://github.blog/2022-02-14-include-diagrams-markdown-files-mermaid/

mermaid:
src: 'mermaid/src/mermaid.js'

## Conversion
# Markdown Options https://jekyllrb.com/docs/configuration/markdown/
# https://github.com/github/jekyll-commonmark-ghpages

markdown: kramdown
highlighter: rouge
# highlighter: rouge
# lsi: false
# excerpt_separator: "\n\n"
# incremental: false
Expand Down Expand Up @@ -303,7 +311,7 @@ defaults:
path: pages/_docs
values:
layout: *pages
category: docs
# category: docs
show_date: true
author_profile: true
read_time: true
Expand Down
32 changes: 32 additions & 0 deletions _data/bookstore/book-data-clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import csv
from collections import defaultdict

# Specify the combined CSV file and output cleaned CSV file
combined_csv_file = 'combined_output.csv'
cleaned_csv_file = 'cleaned_output.csv'

# Dictionary to store book titles and authors
book_data = defaultdict(dict)

# Read the combined CSV file and populate the book_data dictionary
with open(combined_csv_file, 'r', newline='', encoding='utf-8') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
json_file = row["JSON_file"]
mention_type = row["type"]
mention_text = row["mentionText"]
if mention_type == "book_title":
book_data[json_file]["title"] = mention_text
elif mention_type == "book_author":
book_data[json_file]["author"] = mention_text

# Write the cleaned records to the output CSV file
with open(cleaned_csv_file, 'w', newline='', encoding='utf-8') as csv_file:
field_names = ["JSON_file", "title", "author"]
writer = csv.DictWriter(csv_file, fieldnames=field_names)
writer.writeheader()

for json_file, data in book_data.items():
writer.writerow({"JSON_file": json_file, "title": data.get("title", ""), "author": data.get("author", "")})

print("Cleaned CSV file creation completed.")
76 changes: 76 additions & 0 deletions _data/bookstore/book-data-verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import csv
import requests
from difflib import get_close_matches

# Specify the cleaned CSV file and output verified CSV file
cleaned_csv_file = 'cleaned_output.csv'
verified_csv_file = 'verified_output.csv'

# Function to verify book title and author using the Open Library API
def verify_with_open_library(title, author):
api_url = f'http://openlibrary.org/search.json?title={title}&author={author}'
response = requests.get(api_url)
if response.status_code == 200:
data = response.json()
if data.get('num_found', 0) > 0:
# Get details of the first matching book
first_book = data['docs'][0]
corrected_title = first_book.get('title', title)
corrected_author = first_book.get('author_name', [author])[0]
return corrected_title, corrected_author
return None, None

# Function to get ISBN based on verified title
def get_isbn(title):
api_url = f'http://openlibrary.org/search.json?title={title}'
response = requests.get(api_url)
if response.status_code == 200:
data = response.json()
if data.get('num_found', 0) > 0:
# Get details of the first matching book
first_book = data['docs'][0]
identifiers = first_book.get('isbn', [])
return identifiers[0] if identifiers else "ISBN Not Found"
return "ISBN Not Found"

# Read the cleaned CSV file and verify titles and authors
with open(cleaned_csv_file, 'r', newline='', encoding='utf-8') as csv_file:
reader = csv.DictReader(csv_file)

# Open the verified CSV file for writing
with open(verified_csv_file, 'w', newline='', encoding='utf-8') as verified_csv:
field_names = ["JSON_file", "title", "author", "title_verified", "author_verified", "isbn"]
writer = csv.DictWriter(verified_csv, fieldnames=field_names)
writer.writeheader()

for row in reader:
json_file = row["JSON_file"]
title = row["title"]
author = row["author"]

# Verify the title and author using the Open Library API
corrected_title, corrected_author = verify_with_open_library(title, author)

# If no match is found, use difflib to get the closest matches
if corrected_title is None:
closest_titles = get_close_matches(title, [book.strip() for book in row["title"].split(",")])
corrected_title = closest_titles[0] if closest_titles else "Not Found"

if corrected_author is None:
closest_authors = get_close_matches(author, [author.strip() for author in row["author"].split(",")])
corrected_author = closest_authors[0] if closest_authors else "Not Found"

# Get ISBN based on the verified title
isbn = get_isbn(corrected_title)

# Write the verified data to the output CSV file
writer.writerow({
"JSON_file": json_file,
"title": title,
"author": author,
"title_verified": corrected_title,
"author_verified": corrected_author,
"isbn": isbn
})

print("Verified CSV file creation completed.")
Loading

0 comments on commit 9a7a097

Please sign in to comment.