tools/collate_minutes.rb

#!/usr/bin/env ruby
$LOAD_PATH.unshift '/srv/whimsy/lib'

require 'whimsy/asf'
require 'builder'
require 'ostruct'
require 'nokogiri'
require 'net/https'
require 'fileutils'
require 'wunderbar'

Wunderbar.log_level = 'info' unless Wunderbar.logger.info? # try not to override CLI flags

# Add datestamp to log messages (progname is not needed as each prog has its own logfile)
Wunderbar.logger.formatter = proc { |severity, datetime, progname, msg|
      "_#{severity} #{datetime} #{msg}\n"
    }

# for monitoring purposes
at_exit do
  if $! and not $!.instance_of? SystemExit
    msg = "#{$!.backtrace.first} #{$!.message}" rescue $!
    puts "\n*** Exception #{$!.class} : #{msg} ***"
  end
  Wunderbar.info "Finished #{__FILE__}"
end

Wunderbar.info "Starting #{__FILE__}"

# destination directory
SITE_MINUTES = ASF::Config.get(:board_minutes) ||
  File.expand_path(File.join('..', '..', 'www', 'board', 'minutes'), __FILE__)

# list of SVN resources needed
SVN_SITE_RECORDS_MINUTES = ASF::SVN['minutes']
BOARD = ASF::SVN['foundation_board']

KEEP = ARGV.delete '--keep' # keep obsolete files?

force = ARGV.delete '--force' # rerun regardless

NOSTAMP = ARGV.delete '--nostamp' # don't add dynamic timestamp to pages (for debug compares)

NOWARN_LAYOUT = ARGV.delete '--nowarn_layout' # don't add layout change warning to pages (for debug compares)

DUMP_AGENDA = ARGV.delete '--dump_agenda' # output agenda details to stdout

DUMP_PENDING = ARGV.delete '--dump_pending' # output agenda details to stdout

STAMP = (NOSTAMP ? Time.new(1970) :  Time.now).strftime '%Y-%m-%d %H:%M'

YYYYMMDD = ARGV.shift || '20*' # Allow override of minutes to process

TIME_DIFF = (ARGV.shift || '300').to_i # Allow override of seconds of time diff (WHIMSY-204) for testing

MINUTES_NAME = "board_minutes_#{YYYYMMDD}.txt"
MINUTES_PATH = File.join(SVN_SITE_RECORDS_MINUTES, '*', MINUTES_NAME)

Wunderbar.info "Processing minutes matching #{MINUTES_NAME}"

INDEX_FILE = "#{SITE_MINUTES}/index.html"

# quick exit if everything is up to date
if File.exist? INDEX_FILE
  input = Dir[MINUTES_PATH,
    "#{BOARD}/board_minutes_20*.txt"].
    map {|name| File.stat(name).mtime}.
    push(File.stat(__FILE__).mtime, ASF.library_mtime).
    max

  indexmtime = File.stat(INDEX_FILE).mtime
  diff = indexmtime - input
  Wunderbar.info "Most recent update: #{input}"
  Wunderbar.info "Index file update:  #{indexmtime} Diff: #{diff}"
  # WHIMSY-204: allow for update window
  # TODO: consider storing actual update check time
  if diff >= TIME_DIFF
    Wunderbar.info "All up to date! (#{TIME_DIFF})"
    unless force
      # Add stamp to index page
      page = File.read(INDEX_FILE)
      open(INDEX_FILE, 'w') { |file|
        # must agree with section.add_child
        file.write page.sub(/(Last run: )\d{4}-\d\d-\d\d \d\d:\d\d(\. The data is extracted from a list of)/,"\\1#{STAMP}\\2")
      }
      exit
    end
  end
end

Wunderbar.info 'Processing input files'

# mapping of committee names to canonical names (generally from ldap)
canonical = Hash.new {|hash, name| name}

# extract podling information
site = {}
ASF::Podling.list.each do |podling|
  if podling.display_name.downcase != podling.name
    canonical[podling.display_name.downcase] = podling.name
  end

  if podling.status == 'graduated' and podling.enddate
    next if Date.today - podling.enddate > 90
  end

  site[podling.name] = {
    name:   podling.display_name,
    status: podling.status,
    link:   "http://incubator.apache.org/projects/#{podling.name}.html",
    text:   podling.description
  }
end

# get site information
DATAURI = 'https://whimsy.apache.org/public/committee-info.json'
local_copy = File.expand_path('../../www/public/committee-info.json', __FILE__)
if File.exist?(local_copy) && (Time.now - File.stat(local_copy).mtime < 3600)
  Wunderbar.info "Using #{local_copy}"
  cinfo = JSON.parse(File.read(local_copy))
else
  Wunderbar.info 'Fetching remote copy of committee-info.json'
  response = Net::HTTP.get_response(URI(DATAURI))
  response.value() # Raises error if not OK
  cinfo = JSON.parse(response.body)
end

cinfo['committees'].each do |id,v|
  if v['display_name'].downcase != id
    canonical[v['display_name'].downcase] = id
  end
  site[id] = {:name => v['display_name'], :link => v['site'], :text => v['description']}
end

# parse the calendar for layout info (note: hack for &raquo and &nbsp;)
CALENDAR = URI.parse 'https://www.apache.org/foundation/board/calendar.html'
http = Net::HTTP.new(CALENDAR.host, CALENDAR.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
get = Net::HTTP::Get.new CALENDAR.request_uri
$calendar = Nokogiri::HTML(http.request(get).body.gsub('&raquo', '&#187;').gsub('&nbsp;', '&#160;'))

# Link to headerlink css
link = Nokogiri::XML::Node.new 'link', $calendar
link.set_attribute('rel', 'stylesheet')
link.set_attribute('href', 'https://www.apache.org/css/headerlink.css')
$calendar.at('head').add_child(link)

# add some style
style = Nokogiri::XML::Node.new 'style', $calendar
style.content = %{
  table {
    border: 1px solid #ccc;
    margin-botton: 10px;
    width: 100%;
    border-collapse: collapse;
    border-spacing: 0;
  }

  tbody th, tbody td {
    border-bottom: 1px solid #ccc;
    border-top: 1px solid #ccc;
    padding: 0.2em 1em;
  }

  pre.report {
    color: black;
    font-family: Consolas,monospace
  }
}
$calendar.at('head').add_child(style)

# Make links absolute
%w(a img link script).each do |name|
  $calendar.search(name).each do |element|
    element['href'] = (CALENDAR + element['href'].strip).to_s if element['href']
    element['src'] = (CALENDAR + element['src'].strip).to_s if element['src']
  end
end

# handle project name changes
# see also www/board/minutes/.htaccess
# also see parse (Executive) Officer Reports ca. line 670
def name_changes(title)
  title.sub! 'Ace', 'ACE' # WHIMSY-31
  title.sub! 'ADF Faces', 'MyFaces' # via Trinidad
  title.sub! 'Amber', 'Oltu'
  title.sub! 'Apache/TCL', 'Tcl'
  title.sub! 'Argus', 'Ranger'
  title.sub! 'ASF Rep. for W3C', 'W3C Relations'
  title.sub! 'Bean Validation', 'BVal'
  title.sub! 'BeanValidation', 'BVal'
  title.sub! 'Bluesky', 'BlueSky'
  title.sub! 'BRPC', 'brpc'
  title.sub! 'Callback', 'Cordova'
  title.sub! 'Conferences', 'Conference Planning'
  title.sub! 'Cxx Standard Library', 'C++ Standard Library'
  title.sub! 'Deft', 'AWF'
  title.sub! 'DLab', 'DataLab'
  title.sub! 'Distributed Release Audit Tool (DRAT)', 'DRAT'
  title.sub! 'Dolphin Scheduler', 'DolphinScheduler' # board_minutes_2019_11_20.txt
  title.sub! 'Easyant', 'EasyAnt'
  title.sub! 'Empire-DB', 'Empire-db'
  title.sub! 'Fleece', 'Johnzon'
  title.sub! 'Geroniomo', 'Geronimo'
  title.sub! 'iBatis', 'iBATIS'
  title.sub! 'infrastructure', 'Infrastructure'
  title.sub! 'ISIS', 'Causeway'
  title.sub! 'Isis', 'Causeway'
  title.sub! 'IVY', 'Ivy'
  title.sub! 'JackRabbit', 'Jackrabbit'
  title.sub! 'James', 'JAMES'
  title.sub! 'Java Community Process', 'JCP'
  title.sub! 'JSecurity', 'Shiro'
  title.sub! 'Juice', 'JuiCE'
  title.sub! 'log4php', 'Log4php'
  title.sub! 'Lucene.NET', 'Lucene.Net'
  title.sub! 'lucene4c', 'Lucene4c'
  title.sub! 'MesaTEE', 'Teaclave'
  title.sub! 'Ode', 'ODE'
  title.sub! 'ODFToolkit', 'ODF Toolkit'
  title.sub! 'Open for Business', 'OFBiz'
  title.sub! 'TomEE (OpenEJB)', 'TomEE'
  title.sub! 'OpenEJB', 'TomEE'
  title.sub! 'Openmeetings', 'OpenMeetings'
  title.sub! 'OpenOffice.org', 'OpenOffice'
  title.sub! 'Optiq', 'Calcite'
  title.sub! 'Orc', 'ORC'
  title.sub! 'Oscar', 'Felix'
  title.sub! 'PonyMail', 'Pony Mail'
  title.sub! 'PRC', 'Public Relations'
  title.sub! 'Public Relations Commitee', 'Public Relations'
  title.sub! 'Quarks', 'Edgent'
  title.sub! 'SensSoft', 'Flagon'
  title.sub! 'Servicecomb', 'ServiceComb'
  title.sub! 'Singa', 'SINGA'
  title.sub! 'Socialsite', 'SocialSite'
  title.sub! 'stdcxx', 'C++ Standard Library'
  title.sub! 'STDCXX', 'C++ Standard Library'
  title.sub! 'Steve', 'STeVe'
  title.sub! 'Stratosphere', 'Flink'
  title.sub! 'SystemML', 'SystemDS'
  title.sub! 'TCL', 'Tcl'
  title.sub! 'TubeMQ', 'InLong'
  title.sub! 'Web services', 'Web Services'
  title.sub! 'Zest', 'Polygene'
  title.sub! "Infrastructure (President's)", 'Infrastructure'
  title.sub! %r{\bKi\b}, 'Shiro'
  title.sub! %r{^HTTPD?$}, 'HTTP Server'
  title.sub! %r{^Infrastructure .*}, 'Infrastructure'
  title.sub! %r{^Labs .*}, 'Labs'
  title.sub! %r{^Logging$}, 'Logging Services'
  title.sub! %r{APR$}, 'Portable Runtime (APR)'
  title.sub! %r{CeltiX[Ff]ire}, 'CXF'
  title.sub! %r{Fund[- ][rR]aising}, 'Fundraising'
  title.sub! %r{Perl-Apache( PMC)?}, 'Perl'
  title.sub! %r{Portable Runtime$}, 'Portable Runtime (APR)'
  title.sub! %r{Public Relations Committee}, 'Public Relations'
  title.sub! %r{Security$}, 'Security Team'
end

agenda = {}

posted = Dir[MINUTES_PATH].sort
unapproved = Dir[File.join(BOARD, MINUTES_NAME)].sort

FileUtils.mkdir_p SITE_MINUTES

seen={}

(posted+unapproved).each do |txt|
  date = $1 if txt =~ /(\d\d\d\d_\d\d_\d\d)/
  next unless date
  if seen.has_key? date
    Wunderbar.warn "Already processed #{seen[date]}; skipping #{txt}"
    next
  end
  Wunderbar.info "Parsing input for #{date}"
  seen[date] = txt
  minutes = open(txt) {|file| file.read}
  pending = {}

  # parse Attachments (includes both Officer Reports and Committee Reports)
  minutes.scan(/
    -{41}\n                        # separator
    Attachment\s\s?(\w+):[ ](.+?)\n # Attachment, Title
    (.)(.*?)\n                     # separator, report
    (?=[-_]{41,}\n(?:End|Attach))     # separator
  /mx).each do |attach,title,cont,text|

    # We need to keep the start of the second line.
    # Otherwise leading spaces in the report body look like a continuation line
    if cont == ' ' # continuation line was not empty; check if it's a continuation
      # join multiline titles
      while text.start_with? '        '
        append, text = text.split("\n", 2)
        title += ' ' + append.strip
      end
    end

    owners = nil
    if title =~ /^Report from the(?: VP of)? (.+)/i
      title = $1
      if title =~ /^(.+?) +\[([^\]]+)\]/
          title = $1
          owners = $2
      end
    end
    title.sub! /Special /, ''
    title.sub! /Requested /, ''
    title.sub! /(^| )Report To The Board( On)?( |$)/i, ''
    title.sub! /^Board Report for /, ''
    title.sub! /^Status [Rr]eport for (the )?/, ''
    title.sub! /^Report from the /i, ''
    title.sub! /^Status report for the /i, ''
    title.sub! /^Apache /, ''
    title.sub! /^\/ /, ''
    title.sub! /\s+\[.*\]\s*$/, ''
    title.sub! /\sTeam$/, ''
    title.sub! /\s[Cc]ommittee?\s*$/, ''
    title.sub! /\s[Pp]roject\s*$/, ''
    title.sub! /\sPMC$/, ''
    title.sub! 'Apache Software Foundation', 'ASF'

    name_changes(title)

    next if title.strip.empty?
    next if text.strip.empty? and title =~ /Intentionally (left )?Blank/i
    next if text.strip.empty? and title =~ /There is No/i

    report = pending[attach] ||= OpenStruct.new
    report.meeting = date
    report.attach = attach
    report.owners ||= owners if owners
    report.title = title.strip #.downcase
    report.text = text

    if title =~ /budget|spending/i
      report.subtitle = title
      report.title = 'Budget'
      report.attach = '@' + attach
    elsif title =~ /Contributor License Agreement/
      report.subtitle = title
      report.title = 'Legal Affairs'
      report.attach = '1' + attach
    elsif title =~ /P(rofit-and-|&)L(oss)? Report/
      report.subtitle = title
      report.title = 'Treasurer'
      report.attach = '1' + attach
    elsif title =~ /alleged JBoss IP infringement/
      report.subtitle = title
      report.title = 'Alleged JBoss IP Infringement'
      report.attach = '@' + attach
    elsif title =~ /Written Consent of the Directors/
      report.attach = '@' + attach
    end

    if title == 'Incubator' and text
      sections = text.split(/\nStatus [rR]eport (.*)\n=+\n/)
      # Some early 2012 minutes have a 'Detailed Reports' header before the first podling report
      # i.e. the podling reports follow the line
      # '-------------------- Detailed Reports --------------------'
      # instead of the following
      # '--------------------'
      # Some reports include trailing spaces after the ----
      # podling header may now be prefixed with ## (since June 2019)
      # Also there may be a blank line before the ##
      sections = text.split(/\n[-=][-=]+(?: Detailed Reports ---+)?\s*\n(?:\n?##)?\s*([a-zA-Z].*)\n\n/) if sections.length < 9
      sections = [''] if sections.include? 'FAILED TO REPORT'
      sections = text.split(/\n(\w+)\n-+\n\n/) if sections.length < 9
      sections = text.split(/\n=+\s+([\w.]+)\s+=+\n+/) if sections.length < 9

      prev = nil

      if sections.length > 1
        report.text = sections.shift
        sections.each_slice(2) do |title, text|
          title.sub! /^regarding /, ''
          title.sub! /^for /, ''
          title.sub! /^from /, ''
          title.sub! /^the /, ''
          title.sub! /\sPPMC$/, ''

          if title =~ /Apache (.*) is a/
            text = title + "\n" + text
            title = $1
          end

          if title =~ /(.*) has been incubating/
            text = title + "\n" + text
            title = $1
          end

          if title =~ /(.*) -- (DID NOT REPORT)/
            text = $2 + "\n" + text
            title = $1
          end

          if title =~ /(.*?) - (.*)/
            text = $2 + "\n" + text
            title = $1
          end

          if title =~ /(.*? sponsored) incubation \((.*)\)/
            text = $2 + "\n" + text
            title = $1
          end

          next if title == 'April 2011 podling reports'

          name_changes(title)

          title.sub! /\s+\(.*\)$/, ''
          title.sub! /^Apache(: Project)?/, ''

          if %w(Mentors Committers).include? title
            prev.text += "\n== #{title}==\n\n#{text}" if prev
            next
          end

          report = OpenStruct.new
          report.meeting = date
          report.attach = '.' + title
          report.title = title.strip
          report.text = text
          pending[report.attach] = report

          prev = report
        end
      end
    end
  end

  # parse Officer and Committee Reports for owners and comments
  minutes.scan(/
    \[([^\n]+)\]\n\n                  # owners
    \s{7}See\sAttachment\s\s?(\w+)    # attach
    (.*?)\n                           # comments
    \s\s\s\s?\w                       # separator
  /mx).each do |owners,attach,comments|
    report = pending[attach] ||= OpenStruct.new
    report.meeting = date
    report.attach = attach
    report.owners = owners
    cs = comments.strip
    report.comments = cs if cs.length > 0
  end

  # fill in comments from missing reports
  # TODO: temporarily omit Additional Officer processing as it generates some incorrect ownership
  ['Committee', '_Additional Officer_'].each do |section|
    reports = minutes[/^ \d\. #{section} Reports(\s*(\n|  .*\n)+)/,1]
    next unless reports
    reports.split(/^    (\w+)\./)[1..-1].each_slice(2) do |attach, comments|
      next if attach.length > 2 # Why?
      next if comments.include? 'See Attachment' # handled above
      owners = comments[/\[([^\n]+)\]/,1]
      comments.sub!(/.*\s+\n/, '')
      next if comments.empty?
      # TODO: This does not work properly
      attach = ('A'..attach).count.to_s if section == 'Additional Officer'

      report = pending[attach] ||= OpenStruct.new
      report.meeting = date
      report.attach = attach
      report.owners = owners
      cs = comments.strip
      report.comments = cs if cs.length > 0
    end
  end

  # parse Action Items
  minutes.scan(/
    \n\s+(\w+)\.\s                    # attach
    Review\sOutstanding\s(Action\sItems)\n\n?
    (.*?)                             # text
    \n\s?\d                           # separator
  /mx).each do |attach, title, text|
    report = OpenStruct.new
    report.title ||= title #.downcase
    report.meeting = date
    report.attach = '+' + title
    text.gsub! /^\s?\d+\.\s.*\s*\Z/, ''
    report.text = text.gsub Regexp.new('^'+text.match(/^ */)[0]), '' if text
    pending[title] = report
  end

  # parse other agenda items
  establish='' # pick up misplaced PMC creates
  minutes.scan(/
    \n\s*(\w+)\.\s                    # attach
    (Discussion\sItems|Unfinished\sBusiness|New\sBusiness|Announcements)\n
    (.*?)                             # text
    (?=\n\s?\d)                       # separator
  /mx).each do |attach, title, text|
    next if text.strip.empty?
    next if text =~ /\A\s*none\.?\s*\z/i
    next if text =~ /\A\s*no unfinished business\.?\s*\z/i
    if text =~ /Establish the Apache \S+ Project/ # 2012_08_28
      establish += text
      next
    end

    if title !~ /Discussion/ or text !~ /\A\n*\s{3,5}[0-9A-Z]\.\s.*\n\n/
      report = OpenStruct.new
      report.title ||= title #.downcase
      report.meeting = date
      report.attach = '+' + title
      report.text = text.strip
      pending[title] = report
    else
      text.scan(/
        \s{3}[\s\d]([0-9A-Z])\. # agenda item
        \s+(.*?)\n              # title
        (.*?)                   # text
        (?=\n\s{3,5}\d?[0-9A-Z]\.\s|\z) # next section
      /mx).each do |attach,title,text|
        if title.include? "\n" and title.length > 120
          title = title.split("\n")
          text = title[1..-1].join("\n") + "\n" + text
          title = title[0]
        end

        title.sub! 'VP, Data Privacy', 'VP Data Privacy'
        title.sub! /Executive Session \(\d\d.*?\)/, 'Executive Session' # Drop times from titles

        report = OpenStruct.new
        report.title = title.gsub(/\s+/, ' ')
        report.meeting = date
        report.attach = '+' + title
        report.text = text.strip

        if title =~ /budget|spending/i
          report.subtitle = title
          report.title = 'Budget'
          report.attach = '@' + attach
        elsif title =~ /Legal Affairs/
          report.subtitle = title
          report.title = 'Legal Affairs'
          report.attach = '1' + attach
        elsif title =~ /date.+member.+meeting/i || title =~ /member.+meeting.+date/i
          report.subtitle = title
          report.title = 'Set Date for Members Meeting'
          report.attach = '@' + attach
        else
          pmcs = %w{Geronimo iBATIS Santuario}
          pmcs.each do |pmc|
            if title =~ /#{pmc}/i
              report.subtitle = title
              report.title = pmc
              report.attach = '.' + pmc
            end
          end
        end

        pending[title] = report
      end
    end
  end

  # parse Special Orders
  orders = establish + minutes.split(/^ \d\. Special Orders/,2).last.split(/^ \d\./,2).first
  # Some section ids have a leading digit, hence [\s\d]
  orders.scan(/
    \s{3}[\s\d]([A-Z])\.    # agenda item
    \s+(.*?)\n\s*\n         # title
    (.*?)                   # text
    (?=\n\s{3,4}[\s\d][A-Z]\.\s|\z) # next section
  /mx).each do |attach,title,text|
    next if title.count("\n")>1
    report = OpenStruct.new
    title.sub! /(^|\n)\s*Resolution R\d:/, ''
    title.sub! 'Standardise the privacy policy for Foundation web sites', 'Standardise privacy policy for foundation websites'
    title.sub!(/^(?:Proposed )?Resolution (\[R\d\]|to|for) ./) {|c| c[-1..-1].upcase}
    title.sub! /\.$/, ''
    report.title ||= title.strip
    report.meeting = date
    report.attach = '@' + title
    report.text = text.strip

    # Columns:
    # Pfx Title Match
    # If Title is a number, then extract that part of the match
    rules = [
      :X, 2, /Terminat(e|ion of) the (.+?) (Project|PMC|Committee)/,
      :X, 1, /Separate (.+?) from the Apache Software Foundation/,

      :E, 1, /Establishing a PMC for a (.*) project/,
      :E, 1, /Establish (.+?) as a top level project/,
      :E, 1, /Establish (AsterixDB)/, # 2016_04_20
      :E, 4, /Estab?lish(ing|ment)? (of )?(the |an )?(.+?) (board )?(PMC|[pP]roject|[cC]ommittee)$/,
      :E, 2, /Creat(e|ion of) the (.+?) (Project|PMC)/,
      :E, 2, /To (re-establish|create) the (.+?) PMC/,
      :E, 2, /Reestablish(ing the)? (.+?)( Project| Committee | Team)/,
      :E, 1, /^Apache (.+?) Project$/,


      :C, 3, /(Change|Appoint).* Vice President of (the )?(.+)/,
      :C, 2, /(Appoint|Establish) a new (.+?) PMC Chair/,
      :C, 1, /New Vice President for the (.+?) PMC/,
      :C, 1, /Appoint.* as the (.*?) of the ASF/,
      :C, 1, /Appointment of (.*?) Committee Chair/,
      :C, 3, /Appoint(ing a)? new [cC]hair (for|of the) (.*?)( Project|$)/,
      :C, 1, /Alter the Chair of the (.+?) Project/,
      :C, 2, /[cC]hange (the )?[cC]hair of the (.+?) (Project|PMC)/,
      :C, 3, /[Cc]hang(e|ing) (to )?the (.+?) (Project |PMC )?Chair/,
      :C, 2, /Change (of|the) (.+?) (PMC |Project |Committee )Chair/,
      :C, 1, /Resolution to change the (.+?) Chair/,
      :C, 1, /PMC chair change for (.+)/,
      :C, 1, /Change PMC [Cc]hair for (.+?) Project/,
      :C, 3, /Appoint a (new )?(chair for |Vice President of )(.+)/,
      :C, 1, /Appoint .*? as (.+?) chairman/,
      :C, 1, /Change Chair for Apache (.+)/,

      :M, 1, /Reboot the (.+?) (PMC|Committee)/,
      :M, 1, /(.+?) election of new PMC/,
      :M, 2, /Update (membership of the )?(.+?) Committee/,
      :M, 1, /Change to the (.*)? Committee Membership/,
      :M, 1, /Change the Apache (.*) Project Name/,
      :M, 1, /Change the Apache (.*) Project Management Committee/,
       1, 1, /Update ?(audit.+?) Membership/i,
      :M, 1, /Update ?(.+?) Membership/,
      :R, 1, /Rename.* to the ?(.+?) Project/,

      '@', 1, /(.*) Renewal/,

      :C, 'Conference Planning', /Conferences? Committee/,

      '@', 'Budget', /Spending Resolution/i,
      '@', 'Budget', /Budget/i,
      '@', 'Bylaws', /Bylaw/i,
      '@', 'Chief Media Officer', /Chief Media Officer/i,

      1, 'JCP', /Java Community Process/,
      1, 'JCP', /JCP/,
      1, 'Public Relations', /Public Relations/i,
      1, 'Marketing and Publicity', /Press/i,
      1, 'Legal Affairs', /License/i,
      1, 'Legal Affairs', /Copyright/i,
      1, 'Legal Affairs', /contributor agreement/i,
      1, 'Legal Affairs', /CLA/,
      1, 'Legal Affairs', /[MG]PL/,
      1, 'Brand Management', /use.*feather/,
      1, 'Brand Management', /Trademark/,
      1, 'Brand Management', /use.*Apache name/,
      1, 'Brand Management', /Brand Management/i,
      1, 'Travel Assistance', /TAC/,
      1, 'Travel Assistance', /Travel Assistance/,
      1, 'Conference Planning', /Conference Planning/,
      1, 'Fundraising', /Fundraising/,
      1, 'Audit', /Audit/i,

      :C, 'Public Relations', /Appoint Brian Fitzpatrick as a Vice President/,

      '@', 'Appoint Executive Officers', /Appoint(ment of)? (new |ASF )?[oO]fficers/,
      '@', 'Appoint Executive Officers', /Election of Officers/,
      '@', 'Appoint Executive Officers', /Officer Appointments/i,
      '@', 'Set Date for Members Meeting', /date.* member'?s meeting/i,
      '@', 'PMC Membership Change Process', /Empower PMC chairs to change the membership/i,
      '@', 'PMC Membership Change Process', /Amend the Procedure for PMC Membership Changes/i,
      '@', 'Secretarial Assistant', /Approve contract with Jon Jagielski/,
      '@', 'Alleged JBoss IP Infringement', /alleged JBoss IP infringe?ment/,
      '@', 'Discussion Items', /^Discuss/
    ]

    rules.each_slice(3) do |prefix, select, pattern|
      match = pattern.match(report.title)
      if match
        report.subtitle = report.title
        if select.is_a? Integer
          report.title = match[select]
        else
          report.title = select
        end
        report.attach = "#{prefix}#{report.attach}"
        break
      end
    end

    report.title.sub! /^Apache /, ''

    name_changes(report.title)

    report.title.sub! 'standing Audit', 'Audit'
    report.title.sub! 'federated identity', 'Federated Identity'
    report.title.sub! 'WSIF', 'Web Services'

    pending[title] = report
  end

  # parse (Executive) Officer Reports
  execs = minutes[/Officer Reports(.*?)\n[[:blank:]]{1,3}\d+\./m,1]
  if execs
    execs.sub! /\s*Executive officer reports approved.*?\n*\Z/, ''
    # attachments start like this:
    att_prefix = '\n[[:blank:]]{1,5}([A-Z])\.[[:blank:]]'
    execs.scan(/
      #{att_prefix}([^\n]*?)\n          # attach, title
      (.*?)                             # text
      (?=#{att_prefix}|\Z)              # separator
    /mx).each do |attach, title, text|
      next unless text
      next unless title
      next if title.start_with? 'This interim budget shows a surplus'
      next if title.start_with? "President's discretionary fund returned to"

      title.sub! 'Executive VP', 'Executive Vice President'
      title.sub! 'Exec. V.P. and Secretary', 'Secretary'
      title.sub! 'Vice Chairman', 'Vice Chair'
      title.sub! 'Acting Chairman', 'Board Chair' # merge report(s) from acting chair
      title.sub! 'Chairman', 'Board Chair'

      report = OpenStruct.new
      if title.include? ' ['
        report.owners = title.split(' [').last.sub(']','').strip
        title = title.split(' [').first
      end
      report.title ||= title.strip #.downcase
      report.title.gsub! /^V\.?P\.? of /, ''
      report.title.gsub! /\/Apache$/, ''
      report.title = 'Infrastructure' if report.title =~ /Infrastructure/
      report.title = 'Treasurer' if report.title =~ /Treasurer/
      report.meeting = date
      report.attach = '*' + title
      report.text = text.dup
      pending[title] = report
    end
  end

  if DUMP_PENDING
    puts 'Dump of pending data for ' + date
    pending.each do |k,v|
      puts "#{k} #{k == v.attach ? '==' : '!='} #{v.attach}"
      puts v.title
      puts "O: #{v.owners}" if v.owners
      puts "S: #{v.subtitle}" if v.subtitle
      p "C: #{v.comments}" if v.comments
      text = v.text
      puts "#{text.size} #{text.split("\n",2)[0]}"
      puts ''
    end
  end

  # Add to the running tally
  pending.each_value do |report|
    next if not report.title or report.title.empty?

    # flag unposted reports; exclude unposted special orders
    report.posted = posted.include? txt
    next if not report.posted and
      (report.attach =~ /^[A-Z]?@/ or report.attach !~ /^[A-Z.]/)

    agenda[report.title] ||= []
    agenda[report.title] << report
  end
end

if DUMP_AGENDA
  puts 'Dump of agenda data for this run'
  agenda.each do |title, reports|
    p [reports.length > 1 ? '>1' : '=1', reports.last.attach[0..1], reports.length, title]
  end
end

Wunderbar.info 'Starting to generate output'

# determine link for each report
link = {}
agenda.each do |title, reports|
  link[title] = title.sub('C++','Cxx').gsub(/\W/,'_') + '.html'
end

# Simplify creating content
def getHTMLbody()
  builder = Builder::XmlMarkup.new :indent => 2
  yield builder
  return Nokogiri::HTML(builder.target!).at('body').children
end

# Combine content produced here with the template fetched previously
def layout(title = nil)
  builder = Builder::XmlMarkup.new :indent => 2
  yield builder
  content = Nokogiri::HTML(builder.target!)
  if title
    $calendar.at('title').content = "Board Meeting Minutes - #{title}"
#   $calendar.at('h2').content = "Board Meeting Minutes - #{title}"
  else
    $calendar.at('title').content = 'Board Meeting Minutes'
#   $calendar.at('h2').content = "Board Meeting Minutes"
  end

  # Adjust the page header

  # find the intro para; assume it is the first para with a strong tag
  # then back up to the main container class for the page content
  section = $calendar.at('.container p strong').parent.parent
  # Extract all the paragraphs
  paragraphs = section.search('p')

  # remove all the existing content
  section.children.each {|child| child.remove}

  # Add the replacement first para
  section.add_child getHTMLbody {|x|
    x.p do
      if title
        x.text! "This was extracted (@ #{STAMP}) from a list of"
      else # main index, which is always replaced if any input files have changed
        # text below must agree with code that updates the index when no changes have occurred
        x.text! "Last collate_minutes.rb run: #{STAMP}. The data is extracted from a list of"
      end
      x.a 'minutes', :href => 'http://www.apache.org/foundation/records/minutes/'
      x.text! 'which have been approved by the Board.'
      x.br
      x.strong 'Please Note'
      # squiggly heredoc causes problems for Eclipse plugin, but leading spaces don't matter here
      x.text! <<-EOT
      The Board typically approves the minutes of the previous meeting at the
      beginning of every Board meeting; therefore, the list below does not
      normally contain details from the minutes of the most recent Board meeting.
      EOT
      unless NOWARN_LAYOUT
        x.br
        x.br
        x.strong 'WARNING: these pages may omit some original contents of the minutes.'
        x.br
        x.text 'This is due to changes in the layout of the source minutes over the years.'
        x.text 'Fixes are being worked on.'
      end
    end
  }

  # and the second para which is assumed to be the list of years
  section.add_child paragraphs[1]
  section.add_child "\n" # separator to make it easier to read source

  # now add the content provided by the builder block
  content.at('body').children.each {|child| section.add_child child}

  $calendar.to_html
end

Dir.entries(SITE_MINUTES).each do |p|
  next unless p.end_with? '.html'
  next if p == 'index.html'
  unless link.has_value? p
    unless KEEP
      Wunderbar.info "Dropping #{p}"
      File.delete(File.join(SITE_MINUTES,p))
    else
      Wunderbar.info "Outdated? #{p}"
    end
  end
end

# remove variable date from page
def remove_date(page)
  # '%Y-%m-%d %H:%M'
  page.sub /This was extracted \(@ \d\d\d\d-\d\d-\d\d \d\d:\d\d\) from a list of/,''
end

# output each individual report by owner
agenda.sort.each do |title, reports|
  page = layout(title) do |x|
    info = site[canonical[title.downcase]]
    if info
      # site information found, link to it
      x.h1 do
        x.a info[:name], :href => info[:link], :title => info[:text]
      end
    else
      x.h1 title
    end
    reports.reverse.each do |report|
      _id = report.meeting.gsub('_', '-')
      x.h2 id: _id do
        if report.posted
          href = 'http://apache.org/foundation/records/minutes/' +
            "#{report.meeting[0...4]}/board_minutes_#{report.meeting}.txt"
        else
          href = ASF::SVN.svnpath!('foundation_board', "board_minutes_#{report.meeting}.txt")
        end

        x.a Date.parse(report.meeting.gsub('_','/')).strftime('%d %b %Y'),
          href: href, id: "minutes_#{report.meeting}"
        if report.owners
          x.span "[#{report.owners}]", :style => 'font-size: 14px'
        end
        # Add headerlink marker
        x.a '¶', href: "##{_id}", title: 'Permanent link', :class => 'headerlink'
      end
      x.h3 report.subtitle if report.subtitle

      if report.posted
        text = report.text.gsub(/^\t+/) {|tabs| ' ' * (8*tabs.length)}
        text.gsub!(/ *$/, '')
        indent = text.scan(/^([ ]+)/).flatten.min.to_s.length - 1
        text.gsub! /^#{' '*indent}/, '' if indent > 0
        text = $1 + text if text =~ /\A\w.*\n(\s+)/
        text = text.to_s.rstrip
        # N.B. The syntax "class: report" causes problems for the Eclipse Ruby plugin
        x.pre text, 'class' => 'report' unless text.strip.empty?

        if report.comments and report.comments.strip != ''
          report.comments.split(/\n\s*\n/).each do |p|
            x.p p, :style => 'width: 40em'
          end
        elsif text.strip.empty?
          if report.subtitle and not report.subtitle.empty?
            x.p {x.em 'Discussion Item with no text or minutes'}
          else
            x.p {x.em 'A report was expected, but not received'}
          end
        end
      elsif report.text.strip.empty?
        x.p {x.em 'A report was expected, but not received'}
      else
        x.p do
          x.em 'Report was filed, but display is awaiting the approval ' +
            'of the Board minutes.'
        end
      end
    end
  end

  dest = File.join(SITE_MINUTES, link[title])
  if force or !File.exist?(dest) or (remove_date(File.read(dest)) != remove_date(page))
    Wunderbar.info  "Writing #{link[title]}"
    open(dest, 'w') {|file| file.write page}
#  else
#    Wunderbar.info  "Not updating #{link[title]}"
  end
end

# Classification scheme
# Pfx = reports.last.attach[0]
# Count = reports.length
#
# Pfx    Count     Section
# '*'     >1       Executive Officer Reports
# 0-9     >1       Additional Officer Reports
# A-Z     >1       Committee Reports
# '.'     any      Podling Reports
# '@'     >1       Repeating Special Orders
# '+'     >1       Other Agenda Items
# !'.'    =1       Other Attachments, Special Orders, and Discussions

# output index
agenda = agenda.sort_by {|title, reports| title.downcase}
page = layout do |x|
  x.h2 'Executive Officer Reports', :id => 'executive'
  x.ul do
    agenda.each do |title, reports|
      next unless reports.last.attach =~ /^\*/
      next if reports.length == 1
      x.li do
        x.a title, :href => link[title]
      end
    end
  end
  x.h2 'Additional Officer Reports', :id => 'officer'
  x.ul do
    agenda.each do |title, reports|
      next unless reports.last.attach =~ /^\d/
      next if reports.length == 1
      x.li do
        x.a title, :href => link[title]
      end
    end
  end
  x.h2 'Committee Reports', :id => 'committee'
  list = []
  agenda.each do |title, reports|
    next unless reports.last.attach =~ /^[A-Z]/
    next if reports.length == 1
    list << title
  end
  cols = 6
  slice = (list.length+cols-1)/cols
  x.table do
    (0...slice).each do |i|
      x.tr do
        (0...cols).each do |j|
          x.td do
            title = list[i+j*slice]
            if title
              info = site[canonical[title.downcase]]
              if info
                x.a title, :href => link[title], :title => info[:text]
              else
                if cinfo['committees'][title]
                  x.em { x.a title, :href => link[title] }
                else
                  x.del { x.a title, :href => link[title] }
                end
              end
            end
          end
        end
      end
    end
  end
  x.h2 'Podling Reports', :id => 'podling'
  list = []
  agenda.each do |title, reports|
    next unless reports.last.attach =~ /^[.]/
    list << title
  end
  cols = 6
  slice = (list.length+cols-1)/cols
  x.table do
    (0...slice).each do |i|
      x.tr do
        (0...cols).each do |j|
          x.td do
            title = list[i+j*slice]
            if title
              info = site[canonical[title.downcase]]
              if info
                if %w{dormant retired}.include? info[:status]
                  x.del do
                    x.a title, :href => link[title], :title => info[:text]
                  end
                else
                  x.a title, :href => link[title], :title => info[:text]
                end
              else
                x.em { x.a title, :href => link[title] }
              end
            end
          end
        end
      end
    end
  end
  x.h2 'Repeating Special Orders', :id => 'orders'
  x.ul do
    agenda.each do |title, reports|
      next unless reports.last.attach =~ /^@/
      next if reports.length == 1
      x.li do
        x.a title, :href => link[title]
      end
    end
  end
  x.h2 'Other Attachments, Special Orders, and Discussions', :id => 'other'
  x.ul do
    other = {}
    agenda.each do |title, reports|
      next unless reports.length == 1
      next if reports.last.attach =~ /^[.]/
      other[reports.first.subtitle || title] = title
    end
    other.sort.each do |subtitle, title|
      x.li do
        x.a subtitle, :href => link[title]
      end
    end
  end
  x.h2 'Other Agenda Items', :id => 'agenda'
  x.ul do
    agenda.each do |title, reports|
      next unless reports.last.attach =~ /^\+/
      next if reports.length == 1
      x.li do
        x.a title, :href => link[title]
      end
    end
  end
end

open(INDEX_FILE, 'w') {|file| file.write page}

Wunderbar.info "Wrote #{SITE_MINUTES}/index.html"