From 2b7f60abc89df7ef3216eb32a93a497a581a7535 Mon Sep 17 00:00:00 2001
From: Maruan <alshedivat@users.noreply.github.com>
Date: Sat, 18 Jan 2025 22:30:01 -0500
Subject: [PATCH 1/6] [unbundle] Extract citation count plugins into a separate
 gem (#2967)

Removes `_plugins/google-scholar-citations.rb` and
`_plugins/inspirehep-citations.rb` and switches to using `al_citations`
gem instead.
---
 Gemfile                              |  5 ++
 Gemfile.lock                         | 10 ++++
 _config.yml                          |  3 +
 _plugins/google-scholar-citations.rb | 85 ----------------------------
 _plugins/inspirehep-citations.rb     | 57 -------------------
 5 files changed, 18 insertions(+), 142 deletions(-)
 delete mode 100644 _plugins/google-scholar-citations.rb
 delete mode 100644 _plugins/inspirehep-citations.rb

diff --git a/Gemfile b/Gemfile
index c9a27061d1dc..126b3ceeff96 100644
--- a/Gemfile
+++ b/Gemfile
@@ -36,3 +36,8 @@ group :other_plugins do
     # gem 'unicode_utils' -- should be already installed by jekyll
     # gem 'webrick' -- should be already installed by jekyll
 end
+
+# Gems for al-folio plugins
+group :al_folio_plugins do
+    gem 'al_citations', :git => 'https://github.com/al-org-dev/al-citations.git'
+end
diff --git a/Gemfile.lock b/Gemfile.lock
index 98c191d96378..c8e3567d2f0f 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -6,6 +6,15 @@ GIT
       jekyll (>= 0.10.0)
       terser (>= 1.0.0)
 
+GIT
+  remote: https://github.com/al-org-dev/al-citations.git
+  revision: d6dd73152f8ceafe475f0f0ad5c846167c006e4f
+  specs:
+    al_citations (0.1.0)
+      activesupport
+      jekyll (>= 3.0)
+      nokogiri
+
 GEM
   remote: https://rubygems.org/
   specs:
@@ -255,6 +264,7 @@ PLATFORMS
   x86_64-linux-musl
 
 DEPENDENCIES
+  al_citations!
   classifier-reborn
   css_parser
   feedjira
diff --git a/_config.yml b/_config.yml
index 923db92af259..6b8b838fed64 100644
--- a/_config.yml
+++ b/_config.yml
@@ -226,6 +226,9 @@ plugins:
   - jekyll-twitter-plugin
   - jemoji
 
+  # al-folio plugins
+  - al_citations
+
 # Sitemap settings
 defaults:
   - scope:
diff --git a/_plugins/google-scholar-citations.rb b/_plugins/google-scholar-citations.rb
deleted file mode 100644
index 7fde002626d4..000000000000
--- a/_plugins/google-scholar-citations.rb
+++ /dev/null
@@ -1,85 +0,0 @@
-require "active_support/all"
-require 'nokogiri'
-require 'open-uri'
-
-module Helpers
-  extend ActiveSupport::NumberHelper
-end
-
-module Jekyll
-  class GoogleScholarCitationsTag < Liquid::Tag
-    Citations = { }
-
-    def initialize(tag_name, params, tokens)
-      super
-      splitted = params.split(" ").map(&:strip)
-      @scholar_id = splitted[0]
-      @article_id = splitted[1]
-
-      if @scholar_id.nil? || @scholar_id.empty?
-        puts "Invalid scholar_id provided"
-      end
-
-      if @article_id.nil? || @article_id.empty?
-        puts "Invalid article_id provided"
-      end
-    end
-
-    def render(context)
-      article_id = context[@article_id.strip]
-      scholar_id = context[@scholar_id.strip]
-      article_url = "https://scholar.google.com/citations?view_op=view_citation&hl=en&user=#{scholar_id}&citation_for_view=#{scholar_id}:#{article_id}"
-
-      begin
-          # If the citation count has already been fetched, return it
-          if GoogleScholarCitationsTag::Citations[article_id]
-            return GoogleScholarCitationsTag::Citations[article_id]
-          end
-
-          # Sleep for a random amount of time to avoid being blocked
-          sleep(rand(1.5..3.5))
-
-          # Fetch the article page
-          doc = Nokogiri::HTML(URI.open(article_url, "User-Agent" => "Ruby/#{RUBY_VERSION}"))
-
-          # Attempt to extract the "Cited by n" string from the meta tags
-          citation_count = 0
-
-          # Look for meta tags with "name" attribute set to "description"
-          description_meta = doc.css('meta[name="description"]')
-          og_description_meta = doc.css('meta[property="og:description"]')
-
-          if !description_meta.empty?
-            cited_by_text = description_meta[0]['content']
-            matches = cited_by_text.match(/Cited by (\d+[,\d]*)/)
-
-            if matches
-              citation_count = matches[1].sub(",", "").to_i
-            end
-
-          elsif !og_description_meta.empty?
-            cited_by_text = og_description_meta[0]['content']
-            matches = cited_by_text.match(/Cited by (\d+[,\d]*)/)
-
-            if matches
-              citation_count = matches[1].sub(",", "").to_i
-            end
-          end
-
-        citation_count = Helpers.number_to_human(citation_count, :format => '%n%u', :precision => 2, :units => { :thousand => 'K', :million => 'M', :billion => 'B' })
-
-      rescue Exception => e
-        # Handle any errors that may occur during fetching
-        citation_count = "N/A"
-
-        # Print the error message including the exception class and message
-        puts "Error fetching citation count for #{article_id} in #{article_url}: #{e.class} - #{e.message}"
-      end
-
-      GoogleScholarCitationsTag::Citations[article_id] = citation_count
-      return "#{citation_count}"
-    end
-  end
-end
-
-Liquid::Template.register_tag('google_scholar_citations', Jekyll::GoogleScholarCitationsTag)
diff --git a/_plugins/inspirehep-citations.rb b/_plugins/inspirehep-citations.rb
deleted file mode 100644
index 63f59279f6d9..000000000000
--- a/_plugins/inspirehep-citations.rb
+++ /dev/null
@@ -1,57 +0,0 @@
-require "active_support/all"
-require 'net/http'
-require 'json'
-require 'uri'
-
-module Helpers
-  extend ActiveSupport::NumberHelper
-end
-
-module Jekyll
-  class InspireHEPCitationsTag < Liquid::Tag
-    Citations = { }
-
-    def initialize(tag_name, params, tokens)
-      super
-      @recid = params.strip
-    end
-
-    def render(context)
-      recid = context[@recid.strip]
-      api_url = "https://inspirehep.net/api/literature/?fields=citation_count&q=recid:#{recid}"
-
-      begin
-        # If the citation count has already been fetched, return it
-        if InspireHEPCitationsTag::Citations[recid]
-          return InspireHEPCitationsTag::Citations[recid]
-        end
-
-        # Fetch the citation count from the API
-        uri = URI(api_url)
-        response = Net::HTTP.get(uri)
-        data = JSON.parse(response)
-
-        # # Log the response for debugging
-        # puts "API Response: #{data.inspect}"
-
-        # Extract citation count from the JSON data
-        citation_count = data["hits"]["hits"][0]["metadata"]["citation_count"].to_i
-
-        # Format the citation count for readability
-        citation_count = Helpers.number_to_human(citation_count, format: '%n%u', precision: 2, units: { thousand: 'K', million: 'M', billion: 'B' })
-
-      rescue Exception => e
-        # Handle any errors that may occur during fetching
-        citation_count = "N/A"
-
-        # Print the error message including the exception class and message
-        puts "Error fetching citation count for #{recid}: #{e.class} - #{e.message}"
-      end
-
-      InspireHEPCitationsTag::Citations[recid] = citation_count
-      return "#{citation_count}"
-    end
-  end
-end
-
-Liquid::Template.register_tag('inspirehep_citations', Jekyll::InspireHEPCitationsTag)

From f116356a5b34a4da2fd13e4c36f9afcc1a036f73 Mon Sep 17 00:00:00 2001
From: Maruan <alshedivat@users.noreply.github.com>
Date: Sat, 18 Jan 2025 22:33:56 -0500
Subject: [PATCH 2/6] [unbundle] Extract external posts plugin into a separate
 gem (#2966)

Removes `_plugins/external_posts.rb` and switches to using
`al_ext_posts` gem.
---
 Gemfile                    |   3 +-
 Gemfile.lock               |  29 ++++++++--
 _config.yml                |   2 +
 _plugins/external-posts.rb | 105 -------------------------------------
 4 files changed, 29 insertions(+), 110 deletions(-)
 delete mode 100644 _plugins/external-posts.rb

diff --git a/Gemfile b/Gemfile
index 126b3ceeff96..51ec1c4878d2 100644
--- a/Gemfile
+++ b/Gemfile
@@ -28,8 +28,6 @@ end
 # Gems for development or external data fetching (outside :jekyll_plugins)
 group :other_plugins do
     gem 'css_parser'
-    gem 'feedjira'
-    gem 'httparty'
     gem 'observer'       # used by jekyll-scholar
     gem 'ostruct'        # used by jekyll-twitter-plugin
     gem 'terser'         # used by jekyll-terser
@@ -40,4 +38,5 @@ end
 # Gems for al-folio plugins
 group :al_folio_plugins do
     gem 'al_citations', :git => 'https://github.com/al-org-dev/al-citations.git'
+    gem 'al_ext_posts', :git => 'https://github.com/al-org-dev/al-ext-posts.git'
 end
diff --git a/Gemfile.lock b/Gemfile.lock
index c8e3567d2f0f..4f86d4de3d71 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -15,6 +15,16 @@ GIT
       jekyll (>= 3.0)
       nokogiri
 
+GIT
+  remote: https://github.com/al-org-dev/al-ext-posts.git
+  revision: d23754e940328ae2581ad25b83c9d45c35ba4040
+  specs:
+    al_ext_posts (0.1.0)
+      feedjira
+      httparty
+      jekyll (>= 3.0)
+      nokogiri
+
 GEM
   remote: https://rubygems.org/
   specs:
@@ -39,8 +49,13 @@ GEM
       latex-decode (~> 0.0)
       racc (~> 1.7)
     bigdecimal (3.1.9)
-    citeproc (1.0.10)
+    citeproc (1.1.0)
+      date
+      forwardable
+      json
       namae (~> 1.0)
+      observer (< 1.0)
+      open-uri (< 1.0)
     citeproc-ruby (1.1.14)
       citeproc (~> 1.0, >= 1.0.9)
       csl (~> 1.6)
@@ -60,6 +75,7 @@ GEM
       addressable
     cssminify2 (2.0.1)
     csv (3.3.2)
+    date (3.4.1)
     deep_merge (1.2.2)
     drb (2.2.1)
     em-websocket (0.5.3)
@@ -79,6 +95,7 @@ GEM
     ffi (1.17.1-x86_64-darwin)
     ffi (1.17.1-x86_64-linux-gnu)
     ffi (1.17.1-x86_64-linux-musl)
+    forwardable (1.3.3)
     forwardable-extended (2.6.0)
     gemoji (4.1.0)
     google-protobuf (4.29.3)
@@ -207,6 +224,10 @@ GEM
     nokogiri (1.18.1-x86_64-linux-musl)
       racc (~> 1.4)
     observer (0.1.2)
+    open-uri (0.5.0)
+      stringio
+      time
+      uri
     ostruct (0.6.1)
     pathutil (0.16.2)
       forwardable-extended (~> 2.6)
@@ -237,10 +258,13 @@ GEM
       google-protobuf (~> 4.29)
     sax-machine (1.3.2)
     securerandom (0.4.1)
+    stringio (3.1.2)
     terminal-table (3.0.2)
       unicode-display_width (>= 1.1.1, < 3)
     terser (1.2.4)
       execjs (>= 0.3.0, < 3)
+    time (0.4.1)
+      date
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     uglifier (4.2.1)
@@ -265,10 +289,9 @@ PLATFORMS
 
 DEPENDENCIES
   al_citations!
+  al_ext_posts!
   classifier-reborn
   css_parser
-  feedjira
-  httparty
   jekyll
   jekyll-archives
   jekyll-email-protect
diff --git a/_config.yml b/_config.yml
index 6b8b838fed64..241a1190ab32 100644
--- a/_config.yml
+++ b/_config.yml
@@ -208,6 +208,7 @@ keep_files:
 
 # Plug-ins
 plugins:
+  # Jekyll plugins
   - jekyll-archives
   - jekyll-email-protect
   - jekyll-feed
@@ -228,6 +229,7 @@ plugins:
 
   # al-folio plugins
   - al_citations
+  - al_ext_posts
 
 # Sitemap settings
 defaults:
diff --git a/_plugins/external-posts.rb b/_plugins/external-posts.rb
deleted file mode 100644
index 41a6c4360657..000000000000
--- a/_plugins/external-posts.rb
+++ /dev/null
@@ -1,105 +0,0 @@
-require 'feedjira'
-require 'httparty'
-require 'jekyll'
-require 'nokogiri'
-require 'time'
-
-module ExternalPosts
-  class ExternalPostsGenerator < Jekyll::Generator
-    safe true
-    priority :high
-
-    def generate(site)
-      if site.config['external_sources'] != nil
-        site.config['external_sources'].each do |src|
-          puts "Fetching external posts from #{src['name']}:"
-          if src['rss_url']
-            fetch_from_rss(site, src)
-          elsif src['posts']
-            fetch_from_urls(site, src)
-          end
-        end
-      end
-    end
-
-    def fetch_from_rss(site, src)
-      xml = HTTParty.get(src['rss_url']).body
-      return if xml.nil?
-      feed = Feedjira.parse(xml)
-      process_entries(site, src, feed.entries)
-    end
-
-    def process_entries(site, src, entries)
-      entries.each do |e|
-        puts "...fetching #{e.url}"
-        create_document(site, src['name'], e.url, {
-          title: e.title,
-          content: e.content,
-          summary: e.summary,
-          published: e.published
-        })
-      end
-    end
-
-    def create_document(site, source_name, url, content)
-      # check if title is composed only of whitespace or foreign characters
-      if content[:title].gsub(/[^\w]/, '').strip.empty?
-        # use the source name and last url segment as fallback
-        slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}"
-      else
-        # parse title from the post or use the source name and last url segment as fallback
-        slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
-        slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" if slug.empty?
-      end
-
-      path = site.in_source_dir("_posts/#{slug}.md")
-      doc = Jekyll::Document.new(
-        path, { :site => site, :collection => site.collections['posts'] }
-      )
-      doc.data['external_source'] = source_name
-      doc.data['title'] = content[:title]
-      doc.data['feed_content'] = content[:content]
-      doc.data['description'] = content[:summary]
-      doc.data['date'] = content[:published]
-      doc.data['redirect'] = url
-      site.collections['posts'].docs << doc
-    end
-
-    def fetch_from_urls(site, src)
-      src['posts'].each do |post|
-        puts "...fetching #{post['url']}"
-        content = fetch_content_from_url(post['url'])
-        content[:published] = parse_published_date(post['published_date'])
-        create_document(site, src['name'], post['url'], content)
-      end
-    end
-
-    def parse_published_date(published_date)
-      case published_date
-      when String
-        Time.parse(published_date).utc
-      when Date
-        published_date.to_time.utc
-      else
-        raise "Invalid date format for #{published_date}"
-      end
-    end
-
-    def fetch_content_from_url(url)
-      html = HTTParty.get(url).body
-      parsed_html = Nokogiri::HTML(html)
-
-      title = parsed_html.at('head title')&.text.strip || ''
-      description = parsed_html.at('head meta[name="description"]')&.attr('content') || ''
-      body_content = parsed_html.at('body')&.inner_html || ''
-
-      {
-        title: title,
-        content: body_content,
-        summary: description
-        # Note: The published date is now added in the fetch_from_urls method.
-      }
-    end
-
-  end
-end

From ae54684e4a69d0452dff71bb32965053026f7e94 Mon Sep 17 00:00:00 2001
From: Maruan <alshedivat@users.noreply.github.com>
Date: Mon, 20 Jan 2025 18:16:55 -0500
Subject: [PATCH 3/6] [unbundle] Extract analytics tools into a seprate gem
 plugin (#2971)

---
 Gemfile                                 |  1 +
 Gemfile.lock                            |  9 +++++++++
 _config.yml                             | 18 ++++++++----------
 _includes/scripts.liquid                | 24 ++----------------------
 assets/js/cronitor-analytics-setup.js   |  6 ------
 assets/js/google-analytics-setup.js     |  6 ------
 assets/js/open-panel-analytics-setup.js | 11 -----------
 7 files changed, 20 insertions(+), 55 deletions(-)
 delete mode 100644 assets/js/cronitor-analytics-setup.js
 delete mode 100644 assets/js/google-analytics-setup.js
 delete mode 100644 assets/js/open-panel-analytics-setup.js

diff --git a/Gemfile b/Gemfile
index 51ec1c4878d2..2d8043c72ce7 100644
--- a/Gemfile
+++ b/Gemfile
@@ -37,6 +37,7 @@ end
 
 # Gems for al-folio plugins
 group :al_folio_plugins do
+    gem 'al_analytics', :git => 'https://github.com/al-org-dev/al-analytics.git'
     gem 'al_citations', :git => 'https://github.com/al-org-dev/al-citations.git'
     gem 'al_ext_posts', :git => 'https://github.com/al-org-dev/al-ext-posts.git'
 end
diff --git a/Gemfile.lock b/Gemfile.lock
index 4f86d4de3d71..1faa4e09204c 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -6,6 +6,14 @@ GIT
       jekyll (>= 0.10.0)
       terser (>= 1.0.0)
 
+GIT
+  remote: https://github.com/al-org-dev/al-analytics.git
+  revision: a9c7821b0a3639ed3f1cc8a8cdcaabfbb4f64f5d
+  specs:
+    al_analytics (0.1.0)
+      jekyll (>= 3.0)
+      liquid (>= 4.0)
+
 GIT
   remote: https://github.com/al-org-dev/al-citations.git
   revision: d6dd73152f8ceafe475f0f0ad5c846167c006e4f
@@ -288,6 +296,7 @@ PLATFORMS
   x86_64-linux-musl
 
 DEPENDENCIES
+  al_analytics!
   al_citations!
   al_ext_posts!
   classifier-reborn
diff --git a/_config.yml b/_config.yml
index 241a1190ab32..363a439ceb7a 100644
--- a/_config.yml
+++ b/_config.yml
@@ -72,12 +72,13 @@ og_image: # The site-wide (default for all links) Open Graph preview image
 # Analytics and search engine verification
 # -----------------------------------------------------------------------------
 
-# For Google Analytics, see https://support.google.com/analytics/answer/10447272?hl=en&ref_topic=14088998&sjid=5129943941510317771-SA#zippy=%2Cgoogle-sites
-# and follow the instructions for Google Sites. You will need to create a Google Analytics property and copy the Google tag ID.
-google_analytics: # your Google Analytics measurement ID (format: G-XXXXXXXXXX)
-cronitor_analytics: # cronitor RUM analytics site ID (format: XXXXXXXXX)
-pirsch_analytics: # your Pirsch analytics site ID (length 32 characters)
-openpanel_analytics: # your Openpanel analytics client ID (format: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX)
+analytics:
+  # For Google Analytics, see https://support.google.com/analytics/answer/10447272?hl=en&ref_topic=14088998&sjid=5129943941510317771-SA#zippy=%2Cgoogle-sites
+  # and follow the instructions for Google Sites. You will need to create a Google Analytics property and copy the Google tag ID.
+  google: # your Google Analytics measurement ID (format: G-XXXXXXXXXX)
+  cronitor: # cronitor RUM analytics site ID (format: XXXXXXXXX)
+  pirsch: # your Pirsch analytics site ID (length 32 characters)
+  openpanel: # your Openpanel analytics client ID (format: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX)
 
 # For Google Search Console, see https://support.google.com/webmasters/answer/9008080?hl=en#meta_tag_verification&zippy=%2Chtml-tag
 google_site_verification: # your google-site-verification ID (Google Search Console)
@@ -228,6 +229,7 @@ plugins:
   - jemoji
 
   # al-folio plugins
+  - al_analytics
   - al_citations
   - al_ext_posts
 
@@ -392,10 +394,6 @@ lazy_loading_images: true # enables lazy loading of images (recommended)
 # Optional Features
 # -----------------------------------------------------------------------------
 
-enable_google_analytics: false # enables google analytics
-enable_cronitor_analytics: false # enables cronitor RUM analytics
-enable_pirsch_analytics: false # enables Pirsch analytics (https://pirsch.io/)
-enable_openpanel_analytics: false # enables Openpanel analytics (https://openpanel.dev/)
 enable_google_verification: false # enables google site verification
 enable_bing_verification: false # enables bing site verification
 enable_masonry: true # enables automatic project cards arrangement
diff --git a/_includes/scripts.liquid b/_includes/scripts.liquid
index 7bbd07166933..67b7bd3e4e07 100644
--- a/_includes/scripts.liquid
+++ b/_includes/scripts.liquid
@@ -212,29 +212,9 @@
   {% endunless %}
 {% endif %}
 
-{% if site.enable_google_analytics %}
+{% if site.plugins contains 'al_analytics' %}
   <!-- Analytics -->
-  <!-- Global site tag (gtag.js) - Google Analytics -->
-  <script async src="https://www.googletagmanager.com/gtag/js?id={{ site.google_analytics }}"></script>
-  <script defer src="{{ '/assets/js/google-analytics-setup.js' | relative_url | bust_file_cache }}"></script>
-{% endif %}
-
-{% if site.enable_cronitor_analytics %}
-  <!-- Cronitor RUM -->
-  <script async src="https://rum.cronitor.io/script.js"></script>
-  <script defer src="{{ '/assets/js/cronitor-analytics-setup.js' | relative_url | bust_file_cache }}"></script>
-{% endif %}
-{% if site.enable_pirsch_analytics %}
-  <script
-    defer
-    src="https://api.pirsch.io/pa.js"
-    id="pianjs"
-    data-code="{{ site.pirsch_analytics }}"
-  ></script>
-{% endif %}
-{% if site.enable_openpanel_analytics %}
-  <script defer src="{{ '/assets/js/open-panel-analytics-setup.js' | relative_url | bust_file_cache }}"></script>
-  <script async defer src="https://openpanel.dev/op1.js"></script>
+  {% al_analytics_scripts %}
 {% endif %}
 
 {% if site.enable_progressbar %}
diff --git a/assets/js/cronitor-analytics-setup.js b/assets/js/cronitor-analytics-setup.js
deleted file mode 100644
index 537bc5b4cc72..000000000000
--- a/assets/js/cronitor-analytics-setup.js
+++ /dev/null
@@ -1,6 +0,0 @@
-window.cronitor =
-  window.cronitor ||
-  function () {
-    (window.cronitor.q = window.cronitor.q || []).push(arguments);
-  };
-cronitor("config", { clientKey: "{{ site.cronitor_analytics }}" });
diff --git a/assets/js/google-analytics-setup.js b/assets/js/google-analytics-setup.js
deleted file mode 100644
index c66e2010ee58..000000000000
--- a/assets/js/google-analytics-setup.js
+++ /dev/null
@@ -1,6 +0,0 @@
-window.dataLayer = window.dataLayer || [];
-function gtag() {
-  window.dataLayer.push(arguments);
-}
-gtag("js", new Date());
-gtag("config", "{{ site.google_analytics }}");
diff --git a/assets/js/open-panel-analytics-setup.js b/assets/js/open-panel-analytics-setup.js
deleted file mode 100644
index fbd4833d2f92..000000000000
--- a/assets/js/open-panel-analytics-setup.js
+++ /dev/null
@@ -1,11 +0,0 @@
-window.op =
-  window.op ||
-  function (...args) {
-    (window.op.q = window.op.q || []).push(args);
-  };
-window.op("init", {
-  clientId: "{{ site.openpanel_analytics }}",
-  trackScreenViews: true,
-  trackOutgoingLinks: true,
-  trackAttributes: true,
-});

From 3de45c52a3eb5834a21b1d8065e6bece2819c187 Mon Sep 17 00:00:00 2001
From: Maruan Al-Shedivat <maruan@genesistherapeutics.ai>
Date: Mon, 20 Jan 2025 18:19:11 -0500
Subject: [PATCH 4/6] Remove dead links

---
 CUSTOMIZE.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/CUSTOMIZE.md b/CUSTOMIZE.md
index 9b0f32cfa3cd..351c8505f176 100644
--- a/CUSTOMIZE.md
+++ b/CUSTOMIZE.md
@@ -185,7 +185,6 @@ You can also:
 - delete [\_layouts/archive-category.liquid](_layouts/archive-category.liquid)
 - delete [\_layouts/archive-tag.liquid](_layouts/archive-tag.liquid)
 - delete [\_layouts/archive-year.liquid](_layouts/archive-year.liquid)
-- delete [\_plugins/external-posts.rb](_plugins/external-posts.rb)
 - remove the `jekyll-archives` gem from the [Gemfile](Gemfile) and the `plugins` section in [\_config.yml](_config.yml)
 - remove the `classifier-reborn` gem from the [Gemfile](Gemfile)
 
@@ -227,9 +226,7 @@ You can also:
 - delete [\_includes/bib_search.liquid](_includes/bib_search.liquid)
 - delete [\_includes/citation.liquid](_includes/citation.liquid)
 - delete [\_includes/selected_papers.liquid](_includes/selected_papers.liquid)
-- delete [\_plugins/google-scholar-citations.rb](_plugins/google-scholar-citations.rb)
 - delete [\_plugins/hide-custom-bibtex.rb](_plugins/hide-custom-bibtex.rb)
-- delete [\_plugins/inspirehep-citations.rb](_plugins/inspirehep-citations.rb)
 - remove the `jekyll-scholar` gem from the [Gemfile](Gemfile) and the `plugins` section in [\_config.yml](_config.yml)
 
 ### Removing the repositories page

From f9e4b252b0b70c42b35f64b59868f0c5c5782dad Mon Sep 17 00:00:00 2001
From: George <31376482+george-gca@users.noreply.github.com>
Date: Sun, 26 Jan 2025 23:38:23 -0300
Subject: [PATCH 5/6] Changed to use plugin `jekyll-cache-bust` plugin (#2979)

---
 Gemfile                |  1 +
 Gemfile.lock           |  3 +++
 _config.yml            |  1 +
 _plugins/cache-bust.rb | 51 ------------------------------------------
 4 files changed, 5 insertions(+), 51 deletions(-)
 delete mode 100644 _plugins/cache-bust.rb

diff --git a/Gemfile b/Gemfile
index 2d8043c72ce7..6f8ea42a0057 100644
--- a/Gemfile
+++ b/Gemfile
@@ -5,6 +5,7 @@ gem 'jekyll'
 # Core plugins that directly affect site building
 group :jekyll_plugins do
     gem 'jekyll-archives'
+    gem 'jekyll-cache-bust'
     gem 'jekyll-email-protect'
     gem 'jekyll-feed'
     gem 'jekyll-get-json'
diff --git a/Gemfile.lock b/Gemfile.lock
index 1faa4e09204c..12c06694c7e2 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -150,6 +150,8 @@ GEM
       webrick (~> 1.7)
     jekyll-archives (2.3.0)
       jekyll (>= 3.6, < 5.0)
+    jekyll-cache-bust (0.0.1)
+      jekyll (>= 3.6, < 5.0)
     jekyll-email-protect (1.1.0)
     jekyll-feed (0.17.0)
       jekyll (>= 3.7, < 5.0)
@@ -303,6 +305,7 @@ DEPENDENCIES
   css_parser
   jekyll
   jekyll-archives
+  jekyll-cache-bust
   jekyll-email-protect
   jekyll-feed
   jekyll-get-json
diff --git a/_config.yml b/_config.yml
index 363a439ceb7a..86832ca58c82 100644
--- a/_config.yml
+++ b/_config.yml
@@ -211,6 +211,7 @@ keep_files:
 plugins:
   # Jekyll plugins
   - jekyll-archives
+  - jekyll-cache-bust
   - jekyll-email-protect
   - jekyll-feed
   - jekyll-get-json
diff --git a/_plugins/cache-bust.rb b/_plugins/cache-bust.rb
deleted file mode 100644
index 94d82d20b0a3..000000000000
--- a/_plugins/cache-bust.rb
+++ /dev/null
@@ -1,51 +0,0 @@
-# based on https://distresssignal.org/busting-css-cache-with-jekyll-md5-hash
-# https://gist.github.com/BryanSchuetz/2ee8c115096d7dd98f294362f6a667db
-module Jekyll
-  module CacheBust
-    class CacheDigester
-      require 'digest/md5'
-      require 'pathname'
-
-      attr_accessor :file_name, :directory
-
-      def initialize(file_name:, directory: nil)
-        self.file_name = file_name
-        self.directory = directory
-      end
-
-      def digest!
-        [file_name, '?', Digest::MD5.hexdigest(file_contents)].join
-      end
-
-      private
-
-      def directory_files_content
-        target_path = File.join(directory, '**', '*')
-        Dir[target_path].map{|f| File.read(f) unless File.directory?(f) }.join
-      end
-
-      def file_content
-        local_file_name = file_name.slice((file_name.index('assets/')..-1))
-        File.read(local_file_name)
-      end
-
-      def file_contents
-        is_directory? ? file_content : directory_files_content
-      end
-
-      def is_directory?
-        directory.nil?
-      end
-    end
-
-    def bust_file_cache(file_name)
-      CacheDigester.new(file_name: file_name, directory: nil).digest!
-    end
-
-    def bust_css_cache(file_name)
-      CacheDigester.new(file_name: file_name, directory: 'assets/_sass').digest!
-    end
-  end
-end
-
-Liquid::Template.register_filter(Jekyll::CacheBust)
\ No newline at end of file

From d0b918b5b21d24848b4be151707031c6ae8c328d Mon Sep 17 00:00:00 2001
From: George <31376482+george-gca@users.noreply.github.com>
Date: Sun, 9 Feb 2025 11:34:39 -0300
Subject: [PATCH 6/6] [Unbundle] Changed to use plugin
 `jekyll-3rd-party-libraries` plugin (#2978)

I agree that it is a good idea to unbundle some of the plugins from
`al-folio`, specially since they can be useful to more jekyll users
overall. I have unbundled 2 plugins and created a gem for each one of
them, so they can be easily integrated in any jekyll site. This is the
first one of them.
---
 Gemfile                        |   1 +
 Gemfile.lock                   |   5 +
 _config.yml                    |   2 +-
 _plugins/download-3rd-party.rb | 253 ---------------------------------
 4 files changed, 7 insertions(+), 254 deletions(-)
 delete mode 100644 _plugins/download-3rd-party.rb

diff --git a/Gemfile b/Gemfile
index 6f8ea42a0057..ef240cdeb05d 100644
--- a/Gemfile
+++ b/Gemfile
@@ -4,6 +4,7 @@ gem 'jekyll'
 
 # Core plugins that directly affect site building
 group :jekyll_plugins do
+    gem 'jekyll-3rd-party-libraries'
     gem 'jekyll-archives'
     gem 'jekyll-cache-bust'
     gem 'jekyll-email-protect'
diff --git a/Gemfile.lock b/Gemfile.lock
index 12c06694c7e2..9555e43c55ec 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -132,6 +132,10 @@ GEM
       multi_xml (>= 0.5.2)
     i18n (1.14.6)
       concurrent-ruby (~> 1.0)
+    jekyll-3rd-party-libraries (0.0.1)
+      css_parser (>= 1.6, < 2.0)
+      jekyll (>= 3.6, < 5.0)
+      nokogiri (>= 1.8, < 2.0)
     jekyll (4.3.4)
       addressable (~> 2.4)
       colorator (~> 1.0)
@@ -304,6 +308,7 @@ DEPENDENCIES
   classifier-reborn
   css_parser
   jekyll
+  jekyll-3rd-party-libraries
   jekyll-archives
   jekyll-cache-bust
   jekyll-email-protect
diff --git a/_config.yml b/_config.yml
index 86832ca58c82..ff91547aadb8 100644
--- a/_config.yml
+++ b/_config.yml
@@ -209,7 +209,7 @@ keep_files:
 
 # Plug-ins
 plugins:
-  # Jekyll plugins
+  - jekyll-3rd-party-libraries
   - jekyll-archives
   - jekyll-cache-bust
   - jekyll-email-protect
diff --git a/_plugins/download-3rd-party.rb b/_plugins/download-3rd-party.rb
deleted file mode 100644
index 59f809f4de6f..000000000000
--- a/_plugins/download-3rd-party.rb
+++ /dev/null
@@ -1,253 +0,0 @@
-Jekyll::Hooks.register :site, :after_init do |site|
-  require 'css_parser'
-  require 'digest'
-  require 'fileutils'
-  require 'nokogiri'
-  require 'open-uri'
-  require 'uri'
-
-  font_file_types = ['otf', 'ttf', 'woff', 'woff2']
-  image_file_types = ['.gif', '.jpg', '.jpeg', '.png', '.webp']
-
-  def download_and_change_rule_set_url(rule_set, rule, dest, dirname, config, file_types)
-    # check if the rule has a url
-    if rule_set[rule].include?('url(')
-      # get the file url
-      url = rule_set[rule].split('url(').last.split(')').first
-
-      # remove quotes from the url
-      if url.start_with?('"') || url.start_with?("'")
-        url = url[1..-2]
-      end
-
-      file_name = url.split('/').last.split('?').first
-
-      # verify if the file is of the correct type
-      if file_name.end_with?(*file_types)
-        # fix the url if it is not an absolute url
-        unless url.start_with?('https://')
-          url = URI.join(url, url).to_s
-        end
-
-        # download the file
-        download_file(url, File.join(dest, file_name))
-
-        # change the url to the local file, considering baseurl
-        previous_rule = rule_set[rule]
-        if config['baseurl']
-          # add rest of the src attribute if it exists
-          if rule_set[rule].split(' ').length > 1
-            rule_set[rule] = "url(#{File.join(config['baseurl'], 'assets', 'libs', dirname, file_name)}) #{rule_set[rule].split(' ').last}"
-          else
-            rule_set[rule] = "url(#{File.join(config['baseurl'], 'assets', 'libs', dirname, file_name)})"
-          end
-        else
-          # add rest of the src attribute if it exists
-          if rule_set[rule].split(' ').length > 1
-            rule_set[rule] = "url(#{File.join('/assets', 'libs', dirname, file_name)}) #{rule_set[rule].split(' ').last}"
-          else
-            rule_set[rule] = "url(#{File.join('/assets', 'libs', dirname, file_name)})"
-          end
-        end
-        puts "Changed #{previous_rule} to #{rule_set[rule]}"
-      end
-    end
-  end
-
-  def download_file(url, dest)
-    # only try to download the file if url doesn't start with | for security reasons
-    if url.start_with?('|')
-      return
-    end
-
-    # create the directory if it doesn't exist
-    dir = File.dirname(dest)
-    unless File.directory?(dir)
-      FileUtils.mkdir_p(dir)
-    end
-
-    # download the file if it doesn't exist
-    unless File.file?(dest)
-      puts "Downloading #{url} to #{dest}"
-      File.open(dest, "wb") do |saved_file|
-        URI(url).open("rb") do |read_file|
-          saved_file.write(read_file.read)
-        end
-      end
-
-      # check if the file was downloaded successfully
-      unless File.file?(dest)
-        raise "Failed to download #{url} to #{dest}"
-      end
-    end
-  end
-
-  def download_fonts(url, dest, file_types)
-    # only try to download the file if url doesn't start with | for security reasons
-    if url.start_with?('|')
-      return
-    end
-
-    # only download fonts if the directory doesn't exist or is empty
-    unless File.directory?(dest) && !Dir.empty?(dest)
-      puts "Downloading fonts from #{url} to #{dest}"
-      # get available fonts from the url
-      doc = Nokogiri::HTML(URI(url).open("User-Agent" => "Ruby/#{RUBY_VERSION}"))
-      doc.css('a').each do |link|
-        # get the file name from the url
-        file_name = link['href'].split('/').last.split('?').first
-
-        # verify if the file is a font file
-        if file_name.end_with?(*file_types)
-          # download the file and change the url to the local file
-          download_file(URI.join(url, link['href']).to_s, File.join(dest, file_name))
-        end
-      end
-    end
-  end
-
-  def download_images(url, dest, file_types)
-    # only try to download the file if url doesn't start with | for security reasons
-    if url.start_with?('|')
-      return
-    end
-
-    # only download images if the directory doesn't exist or is empty
-    unless File.directory?(dest) && !Dir.empty?(dest)
-      puts "Downloading images from #{url} to #{dest}"
-      # get available fonts from the url
-      doc = Nokogiri::HTML(URI(url).open("User-Agent" => "Ruby/#{RUBY_VERSION}"))
-      doc.xpath('/html/body/div/div[3]/table/tbody/tr/td[1]/a').each do |link|
-        # get the file name from the url
-        file_name = link['href'].split('/').last.split('?').first
-
-        # verify if the file is a font file
-        if file_name.end_with?(*file_types)
-          # download the file and change the url to the local file
-          download_file(URI.join(url, link['href']).to_s, File.join(dest, file_name))
-        end
-      end
-    end
-  end
-
-  def download_fonts_from_css(config, url, dest, lib_name, file_types)
-    # only try to download the file if url doesn't start with | for security reasons
-    if url.start_with?('|')
-      return
-    end
-
-    # get the file name from the url
-    file_name = url.split('/').last.split('?').first
-
-    if file_name == 'css'
-      file_name = 'google-fonts.css'
-    end
-
-    # only download the css file if it doesn't exist
-    unless File.file?(File.join(dest, file_name))
-      puts "Downloading fonts from #{url} to #{dest}"
-      # download the css file with a fake user agent to force downloading woff2 fonts instead of ttf
-      # user agent from https://www.whatismybrowser.com/guides/the-latest-user-agent/chrome
-      doc = Nokogiri::HTML(URI(url).open("User-Agent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"))
-      css = CssParser::Parser.new
-      css.load_string! doc.document.text
-
-      # get the font-face rules
-      css.each_rule_set do |rule_set|
-        # check if the rule set has a url
-        download_and_change_rule_set_url(rule_set, 'src', File.join(dest, 'fonts'), File.join(lib_name, 'fonts'), config, file_types)
-      end
-
-      # save the modified css file
-      puts "Saving modified css file to #{File.join(dest, file_name)}"
-      File.write(File.join(dest, file_name), css.to_s)
-    end
-
-    return file_name
-  end
-
-  # replace {{version}} with the version number in all 3rd party libraries urls
-  site.config['third_party_libraries'].each do |key, value|
-    if key != 'download'
-      value['url'].each do |type, url|
-        # check if url is a dictionary
-        if url.is_a?(Hash)
-          url.each do |type2, url2|
-            # replace {{version}} with the version number if it exists
-            if url2.include?('{{version}}')
-              site.config['third_party_libraries'][key]['url'][type][type2] = url2.gsub('{{version}}', site.config['third_party_libraries'][key]['version'])
-            end
-          end
-        else
-          # replace {{version}} with the version number if it exists
-          if url.include?('{{version}}')
-            site.config['third_party_libraries'][key]['url'][type] = url.gsub('{{version}}', site.config['third_party_libraries'][key]['version'])
-          end
-        end
-      end
-    end
-  end
-
-  # download 3rd party libraries if required
-  if site.config['third_party_libraries']['download']
-    site.config['third_party_libraries'].each do |key, value|
-      if key != 'download'
-        value['url'].each do |type, url|
-          # check if url is a dictionary
-          if url.is_a?(Hash)
-            url.each do |type2, url2|
-              # get the file name from the url
-              file_name = url2.split('/').last.split('?').first
-              # download the file and change the url to the local file
-              dest = File.join(site.source, 'assets', 'libs', key, file_name)
-              download_file(url2, dest)
-              # change the url to the local file, considering baseurl
-              if site.config['baseurl']
-                site.config['third_party_libraries'][key]['url'][type][type2] = File.join(site.config['baseurl'], 'assets', 'libs', key, file_name)
-              else
-                site.config['third_party_libraries'][key]['url'][type][type2] = File.join('/assets', 'libs', key, file_name)
-              end
-            end
-
-          else
-            if type == 'fonts'
-              # get the file name from the url
-              file_name = url.split('/').last.split('?').first
-
-              if file_name.end_with?('css')
-                # if the file is a css file, download the css file, the fonts from it, and change information on the css file
-                file_name = download_fonts_from_css(site.config, url, File.join(site.source, 'assets', 'libs', key), key, font_file_types)
-                # change the url to the local file, considering baseurl
-                if site.config['baseurl']
-                  site.config['third_party_libraries'][key]['url'][type] = File.join(site.config['baseurl'], 'assets', 'libs', key, file_name)
-                else
-                  site.config['third_party_libraries'][key]['url'][type] = File.join('/assets', 'libs', key, file_name)
-                end
-              else
-                # download the font files and change the url to the local file
-                download_fonts(url, File.join(site.source, 'assets', 'libs', key, site.config['third_party_libraries'][key]['local'][type]), font_file_types)
-              end
-
-            elsif type == 'images'
-              # download the font files and change the url to the local file
-              download_images(url, File.join(site.source, 'assets', 'libs', key, site.config['third_party_libraries'][key]['local'][type]), image_file_types)
-
-            else
-              # get the file name from the url
-              file_name = url.split('/').last.split('?').first
-              # download the file and change the url to the local file
-              dest = File.join(site.source, 'assets', 'libs', key, file_name)
-              download_file(url, dest)
-              # change the url to the local file, considering baseurl
-              if site.config['baseurl']
-                site.config['third_party_libraries'][key]['url'][type] = File.join(site.config['baseurl'], 'assets', 'libs', key, file_name)
-              else
-                site.config['third_party_libraries'][key]['url'][type] = File.join('/assets', 'libs', key, file_name)
-              end
-            end
-          end
-        end
-      end
-    end
-  end
-end