From 2b7f60abc89df7ef3216eb32a93a497a581a7535 Mon Sep 17 00:00:00 2001 From: Maruan Date: Sat, 18 Jan 2025 22:30:01 -0500 Subject: [PATCH 1/6] [unbundle] Extract citation count plugins into a separate gem (#2967) Removes `_plugins/google-scholar-citations.rb` and `_plugins/inspirehep-citations.rb` and switches to using `al_citations` gem instead. --- Gemfile | 5 ++ Gemfile.lock | 10 ++++ _config.yml | 3 + _plugins/google-scholar-citations.rb | 85 ---------------------------- _plugins/inspirehep-citations.rb | 57 ------------------- 5 files changed, 18 insertions(+), 142 deletions(-) delete mode 100644 _plugins/google-scholar-citations.rb delete mode 100644 _plugins/inspirehep-citations.rb diff --git a/Gemfile b/Gemfile index c9a27061d1dc..126b3ceeff96 100644 --- a/Gemfile +++ b/Gemfile @@ -36,3 +36,8 @@ group :other_plugins do # gem 'unicode_utils' -- should be already installed by jekyll # gem 'webrick' -- should be already installed by jekyll end + +# Gems for al-folio plugins +group :al_folio_plugins do + gem 'al_citations', :git => 'https://github.com/al-org-dev/al-citations.git' +end diff --git a/Gemfile.lock b/Gemfile.lock index 98c191d96378..c8e3567d2f0f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -6,6 +6,15 @@ GIT jekyll (>= 0.10.0) terser (>= 1.0.0) +GIT + remote: https://github.com/al-org-dev/al-citations.git + revision: d6dd73152f8ceafe475f0f0ad5c846167c006e4f + specs: + al_citations (0.1.0) + activesupport + jekyll (>= 3.0) + nokogiri + GEM remote: https://rubygems.org/ specs: @@ -255,6 +264,7 @@ PLATFORMS x86_64-linux-musl DEPENDENCIES + al_citations! classifier-reborn css_parser feedjira diff --git a/_config.yml b/_config.yml index 923db92af259..6b8b838fed64 100644 --- a/_config.yml +++ b/_config.yml @@ -226,6 +226,9 @@ plugins: - jekyll-twitter-plugin - jemoji + # al-folio plugins + - al_citations + # Sitemap settings defaults: - scope: diff --git a/_plugins/google-scholar-citations.rb b/_plugins/google-scholar-citations.rb deleted file mode 100644 index 7fde002626d4..000000000000 --- a/_plugins/google-scholar-citations.rb +++ /dev/null @@ -1,85 +0,0 @@ -require "active_support/all" -require 'nokogiri' -require 'open-uri' - -module Helpers - extend ActiveSupport::NumberHelper -end - -module Jekyll - class GoogleScholarCitationsTag < Liquid::Tag - Citations = { } - - def initialize(tag_name, params, tokens) - super - splitted = params.split(" ").map(&:strip) - @scholar_id = splitted[0] - @article_id = splitted[1] - - if @scholar_id.nil? || @scholar_id.empty? - puts "Invalid scholar_id provided" - end - - if @article_id.nil? || @article_id.empty? - puts "Invalid article_id provided" - end - end - - def render(context) - article_id = context[@article_id.strip] - scholar_id = context[@scholar_id.strip] - article_url = "https://scholar.google.com/citations?view_op=view_citation&hl=en&user=#{scholar_id}&citation_for_view=#{scholar_id}:#{article_id}" - - begin - # If the citation count has already been fetched, return it - if GoogleScholarCitationsTag::Citations[article_id] - return GoogleScholarCitationsTag::Citations[article_id] - end - - # Sleep for a random amount of time to avoid being blocked - sleep(rand(1.5..3.5)) - - # Fetch the article page - doc = Nokogiri::HTML(URI.open(article_url, "User-Agent" => "Ruby/#{RUBY_VERSION}")) - - # Attempt to extract the "Cited by n" string from the meta tags - citation_count = 0 - - # Look for meta tags with "name" attribute set to "description" - description_meta = doc.css('meta[name="description"]') - og_description_meta = doc.css('meta[property="og:description"]') - - if !description_meta.empty? - cited_by_text = description_meta[0]['content'] - matches = cited_by_text.match(/Cited by (\d+[,\d]*)/) - - if matches - citation_count = matches[1].sub(",", "").to_i - end - - elsif !og_description_meta.empty? - cited_by_text = og_description_meta[0]['content'] - matches = cited_by_text.match(/Cited by (\d+[,\d]*)/) - - if matches - citation_count = matches[1].sub(",", "").to_i - end - end - - citation_count = Helpers.number_to_human(citation_count, :format => '%n%u', :precision => 2, :units => { :thousand => 'K', :million => 'M', :billion => 'B' }) - - rescue Exception => e - # Handle any errors that may occur during fetching - citation_count = "N/A" - - # Print the error message including the exception class and message - puts "Error fetching citation count for #{article_id} in #{article_url}: #{e.class} - #{e.message}" - end - - GoogleScholarCitationsTag::Citations[article_id] = citation_count - return "#{citation_count}" - end - end -end - -Liquid::Template.register_tag('google_scholar_citations', Jekyll::GoogleScholarCitationsTag) diff --git a/_plugins/inspirehep-citations.rb b/_plugins/inspirehep-citations.rb deleted file mode 100644 index 63f59279f6d9..000000000000 --- a/_plugins/inspirehep-citations.rb +++ /dev/null @@ -1,57 +0,0 @@ -require "active_support/all" -require 'net/http' -require 'json' -require 'uri' - -module Helpers - extend ActiveSupport::NumberHelper -end - -module Jekyll - class InspireHEPCitationsTag < Liquid::Tag - Citations = { } - - def initialize(tag_name, params, tokens) - super - @recid = params.strip - end - - def render(context) - recid = context[@recid.strip] - api_url = "https://inspirehep.net/api/literature/?fields=citation_count&q=recid:#{recid}" - - begin - # If the citation count has already been fetched, return it - if InspireHEPCitationsTag::Citations[recid] - return InspireHEPCitationsTag::Citations[recid] - end - - # Fetch the citation count from the API - uri = URI(api_url) - response = Net::HTTP.get(uri) - data = JSON.parse(response) - - # # Log the response for debugging - # puts "API Response: #{data.inspect}" - - # Extract citation count from the JSON data - citation_count = data["hits"]["hits"][0]["metadata"]["citation_count"].to_i - - # Format the citation count for readability - citation_count = Helpers.number_to_human(citation_count, format: '%n%u', precision: 2, units: { thousand: 'K', million: 'M', billion: 'B' }) - - rescue Exception => e - # Handle any errors that may occur during fetching - citation_count = "N/A" - - # Print the error message including the exception class and message - puts "Error fetching citation count for #{recid}: #{e.class} - #{e.message}" - end - - InspireHEPCitationsTag::Citations[recid] = citation_count - return "#{citation_count}" - end - end -end - -Liquid::Template.register_tag('inspirehep_citations', Jekyll::InspireHEPCitationsTag) From f116356a5b34a4da2fd13e4c36f9afcc1a036f73 Mon Sep 17 00:00:00 2001 From: Maruan Date: Sat, 18 Jan 2025 22:33:56 -0500 Subject: [PATCH 2/6] [unbundle] Extract external posts plugin into a separate gem (#2966) Removes `_plugins/external_posts.rb` and switches to using `al_ext_posts` gem. --- Gemfile | 3 +- Gemfile.lock | 29 ++++++++-- _config.yml | 2 + _plugins/external-posts.rb | 105 ------------------------------------- 4 files changed, 29 insertions(+), 110 deletions(-) delete mode 100644 _plugins/external-posts.rb diff --git a/Gemfile b/Gemfile index 126b3ceeff96..51ec1c4878d2 100644 --- a/Gemfile +++ b/Gemfile @@ -28,8 +28,6 @@ end # Gems for development or external data fetching (outside :jekyll_plugins) group :other_plugins do gem 'css_parser' - gem 'feedjira' - gem 'httparty' gem 'observer' # used by jekyll-scholar gem 'ostruct' # used by jekyll-twitter-plugin gem 'terser' # used by jekyll-terser @@ -40,4 +38,5 @@ end # Gems for al-folio plugins group :al_folio_plugins do gem 'al_citations', :git => 'https://github.com/al-org-dev/al-citations.git' + gem 'al_ext_posts', :git => 'https://github.com/al-org-dev/al-ext-posts.git' end diff --git a/Gemfile.lock b/Gemfile.lock index c8e3567d2f0f..4f86d4de3d71 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -15,6 +15,16 @@ GIT jekyll (>= 3.0) nokogiri +GIT + remote: https://github.com/al-org-dev/al-ext-posts.git + revision: d23754e940328ae2581ad25b83c9d45c35ba4040 + specs: + al_ext_posts (0.1.0) + feedjira + httparty + jekyll (>= 3.0) + nokogiri + GEM remote: https://rubygems.org/ specs: @@ -39,8 +49,13 @@ GEM latex-decode (~> 0.0) racc (~> 1.7) bigdecimal (3.1.9) - citeproc (1.0.10) + citeproc (1.1.0) + date + forwardable + json namae (~> 1.0) + observer (< 1.0) + open-uri (< 1.0) citeproc-ruby (1.1.14) citeproc (~> 1.0, >= 1.0.9) csl (~> 1.6) @@ -60,6 +75,7 @@ GEM addressable cssminify2 (2.0.1) csv (3.3.2) + date (3.4.1) deep_merge (1.2.2) drb (2.2.1) em-websocket (0.5.3) @@ -79,6 +95,7 @@ GEM ffi (1.17.1-x86_64-darwin) ffi (1.17.1-x86_64-linux-gnu) ffi (1.17.1-x86_64-linux-musl) + forwardable (1.3.3) forwardable-extended (2.6.0) gemoji (4.1.0) google-protobuf (4.29.3) @@ -207,6 +224,10 @@ GEM nokogiri (1.18.1-x86_64-linux-musl) racc (~> 1.4) observer (0.1.2) + open-uri (0.5.0) + stringio + time + uri ostruct (0.6.1) pathutil (0.16.2) forwardable-extended (~> 2.6) @@ -237,10 +258,13 @@ GEM google-protobuf (~> 4.29) sax-machine (1.3.2) securerandom (0.4.1) + stringio (3.1.2) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) terser (1.2.4) execjs (>= 0.3.0, < 3) + time (0.4.1) + date tzinfo (2.0.6) concurrent-ruby (~> 1.0) uglifier (4.2.1) @@ -265,10 +289,9 @@ PLATFORMS DEPENDENCIES al_citations! + al_ext_posts! classifier-reborn css_parser - feedjira - httparty jekyll jekyll-archives jekyll-email-protect diff --git a/_config.yml b/_config.yml index 6b8b838fed64..241a1190ab32 100644 --- a/_config.yml +++ b/_config.yml @@ -208,6 +208,7 @@ keep_files: # Plug-ins plugins: + # Jekyll plugins - jekyll-archives - jekyll-email-protect - jekyll-feed @@ -228,6 +229,7 @@ plugins: # al-folio plugins - al_citations + - al_ext_posts # Sitemap settings defaults: diff --git a/_plugins/external-posts.rb b/_plugins/external-posts.rb deleted file mode 100644 index 41a6c4360657..000000000000 --- a/_plugins/external-posts.rb +++ /dev/null @@ -1,105 +0,0 @@ -require 'feedjira' -require 'httparty' -require 'jekyll' -require 'nokogiri' -require 'time' - -module ExternalPosts - class ExternalPostsGenerator < Jekyll::Generator - safe true - priority :high - - def generate(site) - if site.config['external_sources'] != nil - site.config['external_sources'].each do |src| - puts "Fetching external posts from #{src['name']}:" - if src['rss_url'] - fetch_from_rss(site, src) - elsif src['posts'] - fetch_from_urls(site, src) - end - end - end - end - - def fetch_from_rss(site, src) - xml = HTTParty.get(src['rss_url']).body - return if xml.nil? - feed = Feedjira.parse(xml) - process_entries(site, src, feed.entries) - end - - def process_entries(site, src, entries) - entries.each do |e| - puts "...fetching #{e.url}" - create_document(site, src['name'], e.url, { - title: e.title, - content: e.content, - summary: e.summary, - published: e.published - }) - end - end - - def create_document(site, source_name, url, content) - # check if title is composed only of whitespace or foreign characters - if content[:title].gsub(/[^\w]/, '').strip.empty? - # use the source name and last url segment as fallback - slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" - else - # parse title from the post or use the source name and last url segment as fallback - slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') - slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" if slug.empty? - end - - path = site.in_source_dir("_posts/#{slug}.md") - doc = Jekyll::Document.new( - path, { :site => site, :collection => site.collections['posts'] } - ) - doc.data['external_source'] = source_name - doc.data['title'] = content[:title] - doc.data['feed_content'] = content[:content] - doc.data['description'] = content[:summary] - doc.data['date'] = content[:published] - doc.data['redirect'] = url - site.collections['posts'].docs << doc - end - - def fetch_from_urls(site, src) - src['posts'].each do |post| - puts "...fetching #{post['url']}" - content = fetch_content_from_url(post['url']) - content[:published] = parse_published_date(post['published_date']) - create_document(site, src['name'], post['url'], content) - end - end - - def parse_published_date(published_date) - case published_date - when String - Time.parse(published_date).utc - when Date - published_date.to_time.utc - else - raise "Invalid date format for #{published_date}" - end - end - - def fetch_content_from_url(url) - html = HTTParty.get(url).body - parsed_html = Nokogiri::HTML(html) - - title = parsed_html.at('head title')&.text.strip || '' - description = parsed_html.at('head meta[name="description"]')&.attr('content') || '' - body_content = parsed_html.at('body')&.inner_html || '' - - { - title: title, - content: body_content, - summary: description - # Note: The published date is now added in the fetch_from_urls method. - } - end - - end -end From ae54684e4a69d0452dff71bb32965053026f7e94 Mon Sep 17 00:00:00 2001 From: Maruan Date: Mon, 20 Jan 2025 18:16:55 -0500 Subject: [PATCH 3/6] [unbundle] Extract analytics tools into a seprate gem plugin (#2971) --- Gemfile | 1 + Gemfile.lock | 9 +++++++++ _config.yml | 18 ++++++++---------- _includes/scripts.liquid | 24 ++---------------------- assets/js/cronitor-analytics-setup.js | 6 ------ assets/js/google-analytics-setup.js | 6 ------ assets/js/open-panel-analytics-setup.js | 11 ----------- 7 files changed, 20 insertions(+), 55 deletions(-) delete mode 100644 assets/js/cronitor-analytics-setup.js delete mode 100644 assets/js/google-analytics-setup.js delete mode 100644 assets/js/open-panel-analytics-setup.js diff --git a/Gemfile b/Gemfile index 51ec1c4878d2..2d8043c72ce7 100644 --- a/Gemfile +++ b/Gemfile @@ -37,6 +37,7 @@ end # Gems for al-folio plugins group :al_folio_plugins do + gem 'al_analytics', :git => 'https://github.com/al-org-dev/al-analytics.git' gem 'al_citations', :git => 'https://github.com/al-org-dev/al-citations.git' gem 'al_ext_posts', :git => 'https://github.com/al-org-dev/al-ext-posts.git' end diff --git a/Gemfile.lock b/Gemfile.lock index 4f86d4de3d71..1faa4e09204c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -6,6 +6,14 @@ GIT jekyll (>= 0.10.0) terser (>= 1.0.0) +GIT + remote: https://github.com/al-org-dev/al-analytics.git + revision: a9c7821b0a3639ed3f1cc8a8cdcaabfbb4f64f5d + specs: + al_analytics (0.1.0) + jekyll (>= 3.0) + liquid (>= 4.0) + GIT remote: https://github.com/al-org-dev/al-citations.git revision: d6dd73152f8ceafe475f0f0ad5c846167c006e4f @@ -288,6 +296,7 @@ PLATFORMS x86_64-linux-musl DEPENDENCIES + al_analytics! al_citations! al_ext_posts! classifier-reborn diff --git a/_config.yml b/_config.yml index 241a1190ab32..363a439ceb7a 100644 --- a/_config.yml +++ b/_config.yml @@ -72,12 +72,13 @@ og_image: # The site-wide (default for all links) Open Graph preview image # Analytics and search engine verification # ----------------------------------------------------------------------------- -# For Google Analytics, see https://support.google.com/analytics/answer/10447272?hl=en&ref_topic=14088998&sjid=5129943941510317771-SA#zippy=%2Cgoogle-sites -# and follow the instructions for Google Sites. You will need to create a Google Analytics property and copy the Google tag ID. -google_analytics: # your Google Analytics measurement ID (format: G-XXXXXXXXXX) -cronitor_analytics: # cronitor RUM analytics site ID (format: XXXXXXXXX) -pirsch_analytics: # your Pirsch analytics site ID (length 32 characters) -openpanel_analytics: # your Openpanel analytics client ID (format: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX) +analytics: + # For Google Analytics, see https://support.google.com/analytics/answer/10447272?hl=en&ref_topic=14088998&sjid=5129943941510317771-SA#zippy=%2Cgoogle-sites + # and follow the instructions for Google Sites. You will need to create a Google Analytics property and copy the Google tag ID. + google: # your Google Analytics measurement ID (format: G-XXXXXXXXXX) + cronitor: # cronitor RUM analytics site ID (format: XXXXXXXXX) + pirsch: # your Pirsch analytics site ID (length 32 characters) + openpanel: # your Openpanel analytics client ID (format: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX) # For Google Search Console, see https://support.google.com/webmasters/answer/9008080?hl=en#meta_tag_verification&zippy=%2Chtml-tag google_site_verification: # your google-site-verification ID (Google Search Console) @@ -228,6 +229,7 @@ plugins: - jemoji # al-folio plugins + - al_analytics - al_citations - al_ext_posts @@ -392,10 +394,6 @@ lazy_loading_images: true # enables lazy loading of images (recommended) # Optional Features # ----------------------------------------------------------------------------- -enable_google_analytics: false # enables google analytics -enable_cronitor_analytics: false # enables cronitor RUM analytics -enable_pirsch_analytics: false # enables Pirsch analytics (https://pirsch.io/) -enable_openpanel_analytics: false # enables Openpanel analytics (https://openpanel.dev/) enable_google_verification: false # enables google site verification enable_bing_verification: false # enables bing site verification enable_masonry: true # enables automatic project cards arrangement diff --git a/_includes/scripts.liquid b/_includes/scripts.liquid index 7bbd07166933..67b7bd3e4e07 100644 --- a/_includes/scripts.liquid +++ b/_includes/scripts.liquid @@ -212,29 +212,9 @@ {% endunless %} {% endif %} -{% if site.enable_google_analytics %} +{% if site.plugins contains 'al_analytics' %} - - - -{% endif %} - -{% if site.enable_cronitor_analytics %} - - - -{% endif %} -{% if site.enable_pirsch_analytics %} - -{% endif %} -{% if site.enable_openpanel_analytics %} - - + {% al_analytics_scripts %} {% endif %} {% if site.enable_progressbar %} diff --git a/assets/js/cronitor-analytics-setup.js b/assets/js/cronitor-analytics-setup.js deleted file mode 100644 index 537bc5b4cc72..000000000000 --- a/assets/js/cronitor-analytics-setup.js +++ /dev/null @@ -1,6 +0,0 @@ -window.cronitor = - window.cronitor || - function () { - (window.cronitor.q = window.cronitor.q || []).push(arguments); - }; -cronitor("config", { clientKey: "{{ site.cronitor_analytics }}" }); diff --git a/assets/js/google-analytics-setup.js b/assets/js/google-analytics-setup.js deleted file mode 100644 index c66e2010ee58..000000000000 --- a/assets/js/google-analytics-setup.js +++ /dev/null @@ -1,6 +0,0 @@ -window.dataLayer = window.dataLayer || []; -function gtag() { - window.dataLayer.push(arguments); -} -gtag("js", new Date()); -gtag("config", "{{ site.google_analytics }}"); diff --git a/assets/js/open-panel-analytics-setup.js b/assets/js/open-panel-analytics-setup.js deleted file mode 100644 index fbd4833d2f92..000000000000 --- a/assets/js/open-panel-analytics-setup.js +++ /dev/null @@ -1,11 +0,0 @@ -window.op = - window.op || - function (...args) { - (window.op.q = window.op.q || []).push(args); - }; -window.op("init", { - clientId: "{{ site.openpanel_analytics }}", - trackScreenViews: true, - trackOutgoingLinks: true, - trackAttributes: true, -}); From 3de45c52a3eb5834a21b1d8065e6bece2819c187 Mon Sep 17 00:00:00 2001 From: Maruan Al-Shedivat Date: Mon, 20 Jan 2025 18:19:11 -0500 Subject: [PATCH 4/6] Remove dead links --- CUSTOMIZE.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CUSTOMIZE.md b/CUSTOMIZE.md index 9b0f32cfa3cd..351c8505f176 100644 --- a/CUSTOMIZE.md +++ b/CUSTOMIZE.md @@ -185,7 +185,6 @@ You can also: - delete [\_layouts/archive-category.liquid](_layouts/archive-category.liquid) - delete [\_layouts/archive-tag.liquid](_layouts/archive-tag.liquid) - delete [\_layouts/archive-year.liquid](_layouts/archive-year.liquid) -- delete [\_plugins/external-posts.rb](_plugins/external-posts.rb) - remove the `jekyll-archives` gem from the [Gemfile](Gemfile) and the `plugins` section in [\_config.yml](_config.yml) - remove the `classifier-reborn` gem from the [Gemfile](Gemfile) @@ -227,9 +226,7 @@ You can also: - delete [\_includes/bib_search.liquid](_includes/bib_search.liquid) - delete [\_includes/citation.liquid](_includes/citation.liquid) - delete [\_includes/selected_papers.liquid](_includes/selected_papers.liquid) -- delete [\_plugins/google-scholar-citations.rb](_plugins/google-scholar-citations.rb) - delete [\_plugins/hide-custom-bibtex.rb](_plugins/hide-custom-bibtex.rb) -- delete [\_plugins/inspirehep-citations.rb](_plugins/inspirehep-citations.rb) - remove the `jekyll-scholar` gem from the [Gemfile](Gemfile) and the `plugins` section in [\_config.yml](_config.yml) ### Removing the repositories page From f9e4b252b0b70c42b35f64b59868f0c5c5782dad Mon Sep 17 00:00:00 2001 From: George <31376482+george-gca@users.noreply.github.com> Date: Sun, 26 Jan 2025 23:38:23 -0300 Subject: [PATCH 5/6] Changed to use plugin `jekyll-cache-bust` plugin (#2979) --- Gemfile | 1 + Gemfile.lock | 3 +++ _config.yml | 1 + _plugins/cache-bust.rb | 51 ------------------------------------------ 4 files changed, 5 insertions(+), 51 deletions(-) delete mode 100644 _plugins/cache-bust.rb diff --git a/Gemfile b/Gemfile index 2d8043c72ce7..6f8ea42a0057 100644 --- a/Gemfile +++ b/Gemfile @@ -5,6 +5,7 @@ gem 'jekyll' # Core plugins that directly affect site building group :jekyll_plugins do gem 'jekyll-archives' + gem 'jekyll-cache-bust' gem 'jekyll-email-protect' gem 'jekyll-feed' gem 'jekyll-get-json' diff --git a/Gemfile.lock b/Gemfile.lock index 1faa4e09204c..12c06694c7e2 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -150,6 +150,8 @@ GEM webrick (~> 1.7) jekyll-archives (2.3.0) jekyll (>= 3.6, < 5.0) + jekyll-cache-bust (0.0.1) + jekyll (>= 3.6, < 5.0) jekyll-email-protect (1.1.0) jekyll-feed (0.17.0) jekyll (>= 3.7, < 5.0) @@ -303,6 +305,7 @@ DEPENDENCIES css_parser jekyll jekyll-archives + jekyll-cache-bust jekyll-email-protect jekyll-feed jekyll-get-json diff --git a/_config.yml b/_config.yml index 363a439ceb7a..86832ca58c82 100644 --- a/_config.yml +++ b/_config.yml @@ -211,6 +211,7 @@ keep_files: plugins: # Jekyll plugins - jekyll-archives + - jekyll-cache-bust - jekyll-email-protect - jekyll-feed - jekyll-get-json diff --git a/_plugins/cache-bust.rb b/_plugins/cache-bust.rb deleted file mode 100644 index 94d82d20b0a3..000000000000 --- a/_plugins/cache-bust.rb +++ /dev/null @@ -1,51 +0,0 @@ -# based on https://distresssignal.org/busting-css-cache-with-jekyll-md5-hash -# https://gist.github.com/BryanSchuetz/2ee8c115096d7dd98f294362f6a667db -module Jekyll - module CacheBust - class CacheDigester - require 'digest/md5' - require 'pathname' - - attr_accessor :file_name, :directory - - def initialize(file_name:, directory: nil) - self.file_name = file_name - self.directory = directory - end - - def digest! - [file_name, '?', Digest::MD5.hexdigest(file_contents)].join - end - - private - - def directory_files_content - target_path = File.join(directory, '**', '*') - Dir[target_path].map{|f| File.read(f) unless File.directory?(f) }.join - end - - def file_content - local_file_name = file_name.slice((file_name.index('assets/')..-1)) - File.read(local_file_name) - end - - def file_contents - is_directory? ? file_content : directory_files_content - end - - def is_directory? - directory.nil? - end - end - - def bust_file_cache(file_name) - CacheDigester.new(file_name: file_name, directory: nil).digest! - end - - def bust_css_cache(file_name) - CacheDigester.new(file_name: file_name, directory: 'assets/_sass').digest! - end - end -end - -Liquid::Template.register_filter(Jekyll::CacheBust) \ No newline at end of file From d0b918b5b21d24848b4be151707031c6ae8c328d Mon Sep 17 00:00:00 2001 From: George <31376482+george-gca@users.noreply.github.com> Date: Sun, 9 Feb 2025 11:34:39 -0300 Subject: [PATCH 6/6] [Unbundle] Changed to use plugin `jekyll-3rd-party-libraries` plugin (#2978) I agree that it is a good idea to unbundle some of the plugins from `al-folio`, specially since they can be useful to more jekyll users overall. I have unbundled 2 plugins and created a gem for each one of them, so they can be easily integrated in any jekyll site. This is the first one of them. --- Gemfile | 1 + Gemfile.lock | 5 + _config.yml | 2 +- _plugins/download-3rd-party.rb | 253 --------------------------------- 4 files changed, 7 insertions(+), 254 deletions(-) delete mode 100644 _plugins/download-3rd-party.rb diff --git a/Gemfile b/Gemfile index 6f8ea42a0057..ef240cdeb05d 100644 --- a/Gemfile +++ b/Gemfile @@ -4,6 +4,7 @@ gem 'jekyll' # Core plugins that directly affect site building group :jekyll_plugins do + gem 'jekyll-3rd-party-libraries' gem 'jekyll-archives' gem 'jekyll-cache-bust' gem 'jekyll-email-protect' diff --git a/Gemfile.lock b/Gemfile.lock index 12c06694c7e2..9555e43c55ec 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -132,6 +132,10 @@ GEM multi_xml (>= 0.5.2) i18n (1.14.6) concurrent-ruby (~> 1.0) + jekyll-3rd-party-libraries (0.0.1) + css_parser (>= 1.6, < 2.0) + jekyll (>= 3.6, < 5.0) + nokogiri (>= 1.8, < 2.0) jekyll (4.3.4) addressable (~> 2.4) colorator (~> 1.0) @@ -304,6 +308,7 @@ DEPENDENCIES classifier-reborn css_parser jekyll + jekyll-3rd-party-libraries jekyll-archives jekyll-cache-bust jekyll-email-protect diff --git a/_config.yml b/_config.yml index 86832ca58c82..ff91547aadb8 100644 --- a/_config.yml +++ b/_config.yml @@ -209,7 +209,7 @@ keep_files: # Plug-ins plugins: - # Jekyll plugins + - jekyll-3rd-party-libraries - jekyll-archives - jekyll-cache-bust - jekyll-email-protect diff --git a/_plugins/download-3rd-party.rb b/_plugins/download-3rd-party.rb deleted file mode 100644 index 59f809f4de6f..000000000000 --- a/_plugins/download-3rd-party.rb +++ /dev/null @@ -1,253 +0,0 @@ -Jekyll::Hooks.register :site, :after_init do |site| - require 'css_parser' - require 'digest' - require 'fileutils' - require 'nokogiri' - require 'open-uri' - require 'uri' - - font_file_types = ['otf', 'ttf', 'woff', 'woff2'] - image_file_types = ['.gif', '.jpg', '.jpeg', '.png', '.webp'] - - def download_and_change_rule_set_url(rule_set, rule, dest, dirname, config, file_types) - # check if the rule has a url - if rule_set[rule].include?('url(') - # get the file url - url = rule_set[rule].split('url(').last.split(')').first - - # remove quotes from the url - if url.start_with?('"') || url.start_with?("'") - url = url[1..-2] - end - - file_name = url.split('/').last.split('?').first - - # verify if the file is of the correct type - if file_name.end_with?(*file_types) - # fix the url if it is not an absolute url - unless url.start_with?('https://') - url = URI.join(url, url).to_s - end - - # download the file - download_file(url, File.join(dest, file_name)) - - # change the url to the local file, considering baseurl - previous_rule = rule_set[rule] - if config['baseurl'] - # add rest of the src attribute if it exists - if rule_set[rule].split(' ').length > 1 - rule_set[rule] = "url(#{File.join(config['baseurl'], 'assets', 'libs', dirname, file_name)}) #{rule_set[rule].split(' ').last}" - else - rule_set[rule] = "url(#{File.join(config['baseurl'], 'assets', 'libs', dirname, file_name)})" - end - else - # add rest of the src attribute if it exists - if rule_set[rule].split(' ').length > 1 - rule_set[rule] = "url(#{File.join('/assets', 'libs', dirname, file_name)}) #{rule_set[rule].split(' ').last}" - else - rule_set[rule] = "url(#{File.join('/assets', 'libs', dirname, file_name)})" - end - end - puts "Changed #{previous_rule} to #{rule_set[rule]}" - end - end - end - - def download_file(url, dest) - # only try to download the file if url doesn't start with | for security reasons - if url.start_with?('|') - return - end - - # create the directory if it doesn't exist - dir = File.dirname(dest) - unless File.directory?(dir) - FileUtils.mkdir_p(dir) - end - - # download the file if it doesn't exist - unless File.file?(dest) - puts "Downloading #{url} to #{dest}" - File.open(dest, "wb") do |saved_file| - URI(url).open("rb") do |read_file| - saved_file.write(read_file.read) - end - end - - # check if the file was downloaded successfully - unless File.file?(dest) - raise "Failed to download #{url} to #{dest}" - end - end - end - - def download_fonts(url, dest, file_types) - # only try to download the file if url doesn't start with | for security reasons - if url.start_with?('|') - return - end - - # only download fonts if the directory doesn't exist or is empty - unless File.directory?(dest) && !Dir.empty?(dest) - puts "Downloading fonts from #{url} to #{dest}" - # get available fonts from the url - doc = Nokogiri::HTML(URI(url).open("User-Agent" => "Ruby/#{RUBY_VERSION}")) - doc.css('a').each do |link| - # get the file name from the url - file_name = link['href'].split('/').last.split('?').first - - # verify if the file is a font file - if file_name.end_with?(*file_types) - # download the file and change the url to the local file - download_file(URI.join(url, link['href']).to_s, File.join(dest, file_name)) - end - end - end - end - - def download_images(url, dest, file_types) - # only try to download the file if url doesn't start with | for security reasons - if url.start_with?('|') - return - end - - # only download images if the directory doesn't exist or is empty - unless File.directory?(dest) && !Dir.empty?(dest) - puts "Downloading images from #{url} to #{dest}" - # get available fonts from the url - doc = Nokogiri::HTML(URI(url).open("User-Agent" => "Ruby/#{RUBY_VERSION}")) - doc.xpath('/html/body/div/div[3]/table/tbody/tr/td[1]/a').each do |link| - # get the file name from the url - file_name = link['href'].split('/').last.split('?').first - - # verify if the file is a font file - if file_name.end_with?(*file_types) - # download the file and change the url to the local file - download_file(URI.join(url, link['href']).to_s, File.join(dest, file_name)) - end - end - end - end - - def download_fonts_from_css(config, url, dest, lib_name, file_types) - # only try to download the file if url doesn't start with | for security reasons - if url.start_with?('|') - return - end - - # get the file name from the url - file_name = url.split('/').last.split('?').first - - if file_name == 'css' - file_name = 'google-fonts.css' - end - - # only download the css file if it doesn't exist - unless File.file?(File.join(dest, file_name)) - puts "Downloading fonts from #{url} to #{dest}" - # download the css file with a fake user agent to force downloading woff2 fonts instead of ttf - # user agent from https://www.whatismybrowser.com/guides/the-latest-user-agent/chrome - doc = Nokogiri::HTML(URI(url).open("User-Agent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")) - css = CssParser::Parser.new - css.load_string! doc.document.text - - # get the font-face rules - css.each_rule_set do |rule_set| - # check if the rule set has a url - download_and_change_rule_set_url(rule_set, 'src', File.join(dest, 'fonts'), File.join(lib_name, 'fonts'), config, file_types) - end - - # save the modified css file - puts "Saving modified css file to #{File.join(dest, file_name)}" - File.write(File.join(dest, file_name), css.to_s) - end - - return file_name - end - - # replace {{version}} with the version number in all 3rd party libraries urls - site.config['third_party_libraries'].each do |key, value| - if key != 'download' - value['url'].each do |type, url| - # check if url is a dictionary - if url.is_a?(Hash) - url.each do |type2, url2| - # replace {{version}} with the version number if it exists - if url2.include?('{{version}}') - site.config['third_party_libraries'][key]['url'][type][type2] = url2.gsub('{{version}}', site.config['third_party_libraries'][key]['version']) - end - end - else - # replace {{version}} with the version number if it exists - if url.include?('{{version}}') - site.config['third_party_libraries'][key]['url'][type] = url.gsub('{{version}}', site.config['third_party_libraries'][key]['version']) - end - end - end - end - end - - # download 3rd party libraries if required - if site.config['third_party_libraries']['download'] - site.config['third_party_libraries'].each do |key, value| - if key != 'download' - value['url'].each do |type, url| - # check if url is a dictionary - if url.is_a?(Hash) - url.each do |type2, url2| - # get the file name from the url - file_name = url2.split('/').last.split('?').first - # download the file and change the url to the local file - dest = File.join(site.source, 'assets', 'libs', key, file_name) - download_file(url2, dest) - # change the url to the local file, considering baseurl - if site.config['baseurl'] - site.config['third_party_libraries'][key]['url'][type][type2] = File.join(site.config['baseurl'], 'assets', 'libs', key, file_name) - else - site.config['third_party_libraries'][key]['url'][type][type2] = File.join('/assets', 'libs', key, file_name) - end - end - - else - if type == 'fonts' - # get the file name from the url - file_name = url.split('/').last.split('?').first - - if file_name.end_with?('css') - # if the file is a css file, download the css file, the fonts from it, and change information on the css file - file_name = download_fonts_from_css(site.config, url, File.join(site.source, 'assets', 'libs', key), key, font_file_types) - # change the url to the local file, considering baseurl - if site.config['baseurl'] - site.config['third_party_libraries'][key]['url'][type] = File.join(site.config['baseurl'], 'assets', 'libs', key, file_name) - else - site.config['third_party_libraries'][key]['url'][type] = File.join('/assets', 'libs', key, file_name) - end - else - # download the font files and change the url to the local file - download_fonts(url, File.join(site.source, 'assets', 'libs', key, site.config['third_party_libraries'][key]['local'][type]), font_file_types) - end - - elsif type == 'images' - # download the font files and change the url to the local file - download_images(url, File.join(site.source, 'assets', 'libs', key, site.config['third_party_libraries'][key]['local'][type]), image_file_types) - - else - # get the file name from the url - file_name = url.split('/').last.split('?').first - # download the file and change the url to the local file - dest = File.join(site.source, 'assets', 'libs', key, file_name) - download_file(url, dest) - # change the url to the local file, considering baseurl - if site.config['baseurl'] - site.config['third_party_libraries'][key]['url'][type] = File.join(site.config['baseurl'], 'assets', 'libs', key, file_name) - else - site.config['third_party_libraries'][key]['url'][type] = File.join('/assets', 'libs', key, file_name) - end - end - end - end - end - end - end -end