User:Richiez/wiki-fref

From Wikipedia, the free encyclopedia



#!/usr/bin/ruby
#
# walk wiki article and replace {{pmid XXXX}} and {{isbn XXXX}} with 
# footnote style references.
# Can supply own tags {{pmid IDnum tag}} for use as <ref name=tag>,
# otherwise tags will be generated
#   using http://diberri.dyndns.org/

require 'cgi'
require 'net/http'

Host = "diberri.dyndns.org"

def get_session()
  return $session if $session
  if ENV["http_proxy"] =~ /http:\/\/(.*):(\d*)/
    $proxy_addr=$1
    $proxy_port=$2
    $session=Net::HTTP.new(Host, 80, $proxy_addr, $proxy_port)
  else
    $session=Net::HTTP.new(Host, 80)
  end
  return $session
end

def lookup_isbn(isbn)
  path = "/cgi-bin/templatefiller/index.cgi?ddb=&type=isbn&id="+isbn.to_s
  lookup(path)
end
def lookup_pmid(pmid)
  ref=$references["isbn-"+pmid.to_s]
  path = "/cgi-bin/templatefiller/index.cgi?ddb=&type=pubmed_id&id="+pmid.to_s
  lookup(path)
end

def lookup(path)
  #Net::HTTP.new(Host, 80, "127.0.0.1", 8080)
  session= get_session 

  headers = {
    "User-Agent" => "Dillo/0.8.5-i18n-misc",
    "Referer" => "http://www.google.com/language_tools?hl=en",
    "Accept-Language" => "en-us,en;q=0.5",
    "Accept-Encoding" => "gzip,deflate",
    "Accept-Charset" => "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
    "Keep-Alive" => "300"
  }
  res=session.get( path, headers )

  unless /^2\d\d/ =~ res.code
    $stderr.printf "HTTP problem: %d %s\n", res.code, res.message
    `wwwoffle \"#{Host+path}\" ` if /^5\d\d/ =~ res.code
    return nil
  end

  txt=res.body
  if /<textarea.*?>\{\{(.*?)\}\}/ =~ txt then
    tmpl=$1.gsub(/&ndash;/,"--")
    tmpl=CGI.unescapeHTML(tmpl)
    tmpl=tmpl.gsub(/--/,"&ndash;")
    #printf "{{%s}}\n", tmpl
    
    return "{{"+tmpl+"}}"
  end
end

def maketag(tmpl,id)
  authors=""
  year=""
  #p tmpl
  tmpl.split("|").each{|el|
    if /author=(.*)/ =~ el
      #printf "author:%s\n",$1
      authors=$1
    elsif  /year=(\d+)/ =~ el
      #printf "year:%s\n",$1
      year=$1
    end
  }
  #p authors
  #p year
  xauthors=[]
  # for tag use lastname of 1st author, year and pmid/isbn id
  authors.split(",")[0].each{|el|
    unless /''et al''/ =~ el
      xauthors.push el.split(" ")[0..-2].join(" ") # get rid of first names
    end
  }
  #p xauthors
  tag="#{xauthors.flatten.join('_')}_#{year}_#{id}".gsub(/[^A-Za-z0-9]/,"_")
end

def replace_ref(orig,id,ptag)
  case orig
  when /pmid.*/i
    gtag="pmid-"+id.to_s
    ref=$references[gtag]
    type="pmid"
    res=lookup_pmid(id) unless ref
  when /isbn.*/i 
    gtag="isbn-"+id.to_s
    ref=$references[gtag]
    type="pmid"
    res=lookup_isbn(id) unless ref
  end

  template = res

  return "<ref name=#{ref}/> <!-- #{orig} -->" if ref

  if res
    if ptag
      ltag=ptag
    else      
      ltag=maketag(res,id)
    end

    ref = "<ref name=#{ltag}> #{template} </ref> <!-- #{orig} -->"
    $references[gtag]=ltag
  end

  return orig unless ref

  return ref
end


def ref_file(file)
  lines=file.readlines
  # 1st line may be title or meta-information
  lines[1..-1].each{|line|
    # split {{(pmid|isbn)(|-nb|-txt) id-number}} into parts
    line.gsub!(/\{\{(pmid|isbn)(.*?)\s+(.*?)\}\}/i){|orig|
      # $2 was style, currently unused
      #printf "doing ref %s, res %s %s %s\n",orig,$1,$2,$3
      args=$3
      # if there is 2 args assume it is pmid+reftag      
      if /(\w+)\s+(\w+)/ =~ args
        id=$1
        ltag=$2
      else
        id=args
        ltag=nil
      end
      replace_ref orig,id,ltag
    }
  }  
  txt=lines.to_s

  printf "%s", txt
end

ARGV.each{|arg|
  $references=Hash.new
  fl=File.open(arg,"r")
  ref_file fl
}