Merge pull request #45 from bamr87:master

Clean up
bamr87 · Nov 29, 2023 · 9a7a097 · 9a7a097
2 parents a2ad65a + 5e9244e
commit 9a7a097
Show file tree

Hide file tree

Showing 34 changed files with 17,275 additions and 134 deletions.
diff --git a/.frontmatter/database/pinnedItemsDb.json b/.frontmatter/database/pinnedItemsDb.json
@@ -0,0 +1 @@
+{}
diff --git a/.frontmatter/database/taxonomyDb.json b/.frontmatter/database/taxonomyDb.json
@@ -0,0 +1 @@
+{"taxonomy":{"categories":[" ","-","H","Nany","Posts","T","a","cheetsheet","d","e","github","guides","home","j","k","l","machine-setup","n","n00b","notes","o","p","posts","quest","quests","quickstart","s","search","t","u","w","y"]}}
diff --git a/.vscode/it-journey.code-workspace b/.vscode/it-journey.code-workspace
@@ -17,7 +17,11 @@
 			"editor.wordWrap": "wordWrapColumn",
 			"editor.wordWrapColumn": 64,
 			"editor.lineNumbers": "off",
-			"editor.quickSuggestions": false,
+			"editor.quickSuggestions": {
+				"comments": "off",
+				"strings": "off",
+				"other": "off"
+			},
 			"editor.minimap.enabled": false
 		},
 		"githubPullRequests.ignoredPullRequestBranches": [

diff --git a/Gemfile b/Gemfile
@@ -10,25 +10,28 @@ source "https://rubygems.org"
 # This will help ensure the proper Jekyll version is running.
 # Happy Jekylling!
 gem 'jekyll' , '3.9.2'
+
 # This is the default theme for new Jekyll sites. You may change this to anything you like.
 # If you want to use GitHub Pages, remove the "gem "jekyll"" above and
 # uncomment the line below. To upgrade, run `bundle update github-pages`.
 
-gem "github-pages"
 # dependancies https://pages.github.com/versions/
 # , group: :jekyll_plugins
 
 # If you have any jekyll plugins, put them here!
 group :jekyll_plugins do
-  gem 'jekyll-feed', "~> 0.12"
-  gem 'jekyll-assets', "~> 1.0.0"
-  gem 'jekyll-sitemap' , "~> 1.4.0"
-  gem 'jekyll-seo-tag', "~> 2.8.0"
+  gem "github-pages"
   gem 'jekyll-algolia' 
+  gem 'jekyll-assets', "~> 1.0.0"
+  gem 'jekyll-mermaid'
+  # gem 'jekyll-spaceship'
+
+# these are all part of the github-pages gem
+  # gem 'jekyll-feed', "~> 0.12"
+  # gem 'jekyll-sitemap' , "~> 1.4.0"
+  # gem 'jekyll-seo-tag', "~> 2.8.0"
   # gem 'jekyll-redirect-from'
   # gem 'jekyll-paginate'
-  # gem 'jekyll-mermaid'
-  # gem 'jekyll-spaceship'
   # gem 'jemoji' # Doesn't work for some reason
 end
 

diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2021 bamr87
+Copyright (c) 2023 bamr87.github.io
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/_config.yml b/_config.yml
@@ -150,20 +150,28 @@ powered_by:
 # All plugins are enabled by default for GitHub Pages. https://pages.github.com/versions/
 
 plugins:
-  # - github-pages
+  - github-pages
   - jekyll-feed
   - jekyll-sitemap
   - jekyll-assets
   - jekyll-algolia
+  - jekyll-mermaid # https://mermaidjs.github.io/
+
   # - jekyll-spaceship
   # - jemoji # Doesn't work for some reason
   # - jekyll-postcss
-  # - jekyll-redirect-from
-
+
+# https://github.blog/2022-02-14-include-diagrams-markdown-files-mermaid/
+
+mermaid:
+  src: 'mermaid/src/mermaid.js'
+
 ## Conversion
+# Markdown Options https://jekyllrb.com/docs/configuration/markdown/
+# https://github.com/github/jekyll-commonmark-ghpages
 
 markdown: kramdown
-highlighter: rouge
+# highlighter: rouge
 # lsi: false
 # excerpt_separator: "\n\n"
 # incremental: false
@@ -303,7 +311,7 @@ defaults:
       path: pages/_docs
     values:
       layout: *pages
-      category: docs
+      # category: docs
       show_date: true
       author_profile: true
       read_time: true

diff --git a/_data/bookstore/book-data-clean.py b/_data/bookstore/book-data-clean.py
@@ -0,0 +1,32 @@
+import csv
+from collections import defaultdict
+
+# Specify the combined CSV file and output cleaned CSV file
+combined_csv_file = 'combined_output.csv'
+cleaned_csv_file = 'cleaned_output.csv'
+
+# Dictionary to store book titles and authors
+book_data = defaultdict(dict)
+
+# Read the combined CSV file and populate the book_data dictionary
+with open(combined_csv_file, 'r', newline='', encoding='utf-8') as csv_file:
+    reader = csv.DictReader(csv_file)
+    for row in reader:
+        json_file = row["JSON_file"]
+        mention_type = row["type"]
+        mention_text = row["mentionText"]
+        if mention_type == "book_title":
+            book_data[json_file]["title"] = mention_text
+        elif mention_type == "book_author":
+            book_data[json_file]["author"] = mention_text
+
+# Write the cleaned records to the output CSV file
+with open(cleaned_csv_file, 'w', newline='', encoding='utf-8') as csv_file:
+    field_names = ["JSON_file", "title", "author"]
+    writer = csv.DictWriter(csv_file, fieldnames=field_names)
+    writer.writeheader()
+
+    for json_file, data in book_data.items():
+        writer.writerow({"JSON_file": json_file, "title": data.get("title", ""), "author": data.get("author", "")})
+
+print("Cleaned CSV file creation completed.")
diff --git a/_data/bookstore/book-data-verify.py b/_data/bookstore/book-data-verify.py
@@ -0,0 +1,76 @@
+import csv
+import requests
+from difflib import get_close_matches
+
+# Specify the cleaned CSV file and output verified CSV file
+cleaned_csv_file = 'cleaned_output.csv'
+verified_csv_file = 'verified_output.csv'
+
+# Function to verify book title and author using the Open Library API
+def verify_with_open_library(title, author):
+    api_url = f'http://openlibrary.org/search.json?title={title}&author={author}'
+    response = requests.get(api_url)
+    if response.status_code == 200:
+        data = response.json()
+        if data.get('num_found', 0) > 0:
+            # Get details of the first matching book
+            first_book = data['docs'][0]
+            corrected_title = first_book.get('title', title)
+            corrected_author = first_book.get('author_name', [author])[0]
+            return corrected_title, corrected_author
+    return None, None
+
+# Function to get ISBN based on verified title
+def get_isbn(title):
+    api_url = f'http://openlibrary.org/search.json?title={title}'
+    response = requests.get(api_url)
+    if response.status_code == 200:
+        data = response.json()
+        if data.get('num_found', 0) > 0:
+            # Get details of the first matching book
+            first_book = data['docs'][0]
+            identifiers = first_book.get('isbn', [])
+            return identifiers[0] if identifiers else "ISBN Not Found"
+    return "ISBN Not Found"
+
+# Read the cleaned CSV file and verify titles and authors
+with open(cleaned_csv_file, 'r', newline='', encoding='utf-8') as csv_file:
+    reader = csv.DictReader(csv_file)
+
+    # Open the verified CSV file for writing
+    with open(verified_csv_file, 'w', newline='', encoding='utf-8') as verified_csv:
+        field_names = ["JSON_file", "title", "author", "title_verified", "author_verified", "isbn"]
+        writer = csv.DictWriter(verified_csv, fieldnames=field_names)
+        writer.writeheader()
+
+        for row in reader:
+            json_file = row["JSON_file"]
+            title = row["title"]
+            author = row["author"]
+
+            # Verify the title and author using the Open Library API
+            corrected_title, corrected_author = verify_with_open_library(title, author)
+
+            # If no match is found, use difflib to get the closest matches
+            if corrected_title is None:
+                closest_titles = get_close_matches(title, [book.strip() for book in row["title"].split(",")])
+                corrected_title = closest_titles[0] if closest_titles else "Not Found"
+
+            if corrected_author is None:
+                closest_authors = get_close_matches(author, [author.strip() for author in row["author"].split(",")])
+                corrected_author = closest_authors[0] if closest_authors else "Not Found"
+
+            # Get ISBN based on the verified title
+            isbn = get_isbn(corrected_title)
+
+            # Write the verified data to the output CSV file
+            writer.writerow({
+                "JSON_file": json_file,
+                "title": title,
+                "author": author,
+                "title_verified": corrected_title,
+                "author_verified": corrected_author,
+                "isbn": isbn
+            })
+
+print("Verified CSV file creation completed.")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"taxonomy":{"categories":[" ","-","H","Nany","Posts","T","a","cheetsheet","d","e","github","guides","home","j","k","l","machine-setup","n","n00b","notes","o","p","posts","quest","quests","quickstart","s","search","t","u","w","y"]}}