#!/usr/bin/ruby


raise NotImplementedError, "this executable still needs some work"

require 'optparse'
require 'rubygems'
require 'histogram'

class Array
  include Histogram
end

opt = {}
opt[:type] = 'avg'
opt[:bins] = 5
opt[:help] = false
opt[:plot] = false
opt[:labels] = false
opt[:with] = 'b'

progname = File.basename(__FILE__)
opts = OptionParser.new do |op|
  op.banner = "usage: #{progname} [OPTIONS] val val ..."
  op.separator ""
  op.separator "  outputs bins and frequencies in two rows (space delimited)"
  #op.separator "  of the bins.  The number of bins is given by:"
  #op.separator "  ??? 10**(log10(#vals) - 1) ????"
  op.separator ""
  op.separator "    -                               ' - ' to separate multiple datasets"
  op.on("-t", "--type <min|avg>", "binning style  [default: #{opt[:type]}]") {|v| opt[:type] = v }
  op.on("-b", "--bins <num>", "number of bins [default: #{opt[:bins]}]") {|v| opt[:bins] = v }
  op.separator "        --bins <start,stop,[step]>   user specified range for bins (*)"
  op.separator "        --bins <num num ...>         user specified custom bins (**)"
  op.on("-l", "--labels <set1,set2,...>", "label the datasets") {|v| opt[:labels] = v }
  op.on("-c", "--cols", "prints 2 cols instead of rows (tab delim.)") {|v| opt[:cols] = v }
  op.on("-i", "--input_cols", "interactively write or paste cols") {|v| opt[:input_cols] = v }
  op.on("-r", "--input_rows", "interactively write or paste rows") {|v| opt[:input_rows] = v }
  op.on("--file_rows <filename>", "rows inside of a file") {|v| opt[:file_rows] = v }
  op.on("--yml-ar <filename>", "one array of data in yml") {|v| opt[:yml_ar] = v }
  op.on("-p", "--plot <filename>", "plots the histogram in extension format (.png|.eps)") {|v| opt[:plot] = v }
  op.separator "        --plot terminal              plots in X11 window"
  op.on("-w", "--with {b|p|lp|l}", "plot with style: ") {|v| opt[:with] = v }
  op.separator "                                         b=boxes,p=points,lp=lines/points,l=lines"
  op.on("-h", "--help", "prints arguments and examples") {|v| opt[:help] = v}
  
  op.separator ""
  op.separator "NOTES:"
  op.separator "    Do NOT use '--option=val' syntax ; instead use '--option val'"
  op.separator "    * 'stop' will always be included, even if it isn't stepped on"
  op.separator "    ** custom bins specified if > 1 args after --bins"
end

def entry_examples(progname)
  array = [] 
  array << ""
  array << "ENTRY EXAMPLES:"
  array << "   #{progname} 7 8 9 -b 2"
  array << "       # vals: 7,8,9 ; number_of_bins: 2"
  array << "   #{progname} -b 2 7 8 9"
  array << "       # vals: [none]  bins: 2 7 8 9  (<--probably not what you wanted)"
  array << "   #{progname} -b 2 -t min 7 8 9"
  array << "       # vals: 7,8,9 ; number_of_bins: 2 ; type: min"
  array << "   #{progname} 7 8 9 -b 0 1 2 3"
  array << "       # vals: 7,8,9 ; bins: 0 1 2 3"
  array << "   #{progname} -b 0 1 2 3 -- 7 8 9"
  array << "       # vals: 7,8,9 ; bins: 0 1 2 3"
  array << "   #{progname} -i"
  array << "       # [then paste in cols]"
  array << "   #{progname} -r -b 10"
  array << "       # number_of_bins: 10 ; [then paste in datasets as rows]"
  array << ""
  array << "PLOT EXAMPLES:"
  array << "   #{progname} 7 8 9 -b 2 -p filename.png"
  array << "       # creates a png image of the plot"
  array << "   #{progname} 7 8 9 -b 2 -p filename.eps -w lp"
  array << "       # creates an eps image of the plot with lines and points"
  array << "   #{progname} 7 8 9 -b 2 -p terminal --with l"
  array << "       # creates a plot in window with lines"
  array.join("\n")
end

# grab --bins (-b) option and all valid trailing values
# returns bins (as array) and args array stripped of --bins stuff
def remove_bins(argv)
  bins = []
  pickup_bins = false
  dash_regex = /^-/o
  newargv = []
  argv.each do |arg|
    if arg =~ dash_regex
      if arg == '-b' || arg == '--bins'  
        pickup_bins = true
      else  ## any other options, then we start picking up vals again
        pickup_bins = false 
        newargv << arg
      end
    elsif pickup_bins
      bins << arg
    else
      newargv << arg
    end
  end
  [bins, newargv]
end

# takes args like this:
# %w(23 43 - 432 - 42 43 32)
# returns each val as .to_f
# -> [[23, 43], [432], [42, 43, 32]]
def extract_vals(argv)
  vals_ar = []
  if argv.first == '-'
    argv.shift
  end
  vals_ar << []
  current = vals_ar.last
  argv.each do |val|
    if val == '-'
      vals_ar << []
      current = vals_ar.last
    else
      current << val.to_f
    end
  end
  vals_ar
end


# returns as arrays the datasets given
# cols is limited to two columns
def get_input_vals(cols=true)
  datasets = []
  if cols
    puts "==> Paste (or enter) values as a single column."
    puts "==> OR paste bins, then values as two columns."
    puts "==> [multiple datasets can be input on command line,"
    puts "==>  or as rows]"
  else
    puts "==> Paste (or enter) values (multiple datasets OK) as rows"
    puts "==> The first row will be considered the bins unless"
    puts "==> the bins were already specified on the command line"
  end
  lines = []
  while (string = gets)
    if string =~ /.+/
      lines << string 
    else
      break
    end
  end
  ar_of_ar = lines.map do |line|
    line.chomp.split(/\s+/)
  end

  if cols
    ds0 = []
    ds1 = []
    ar_of_ar.each do |ar|
      if ar[0] != '' 
        ds0 << ar[0]
      end
      if ar[1]
        ds1 << ar[1]
      end
    end
    datasets << ds0
    datasets << ds1 if ds1.size > 0
  else
    datasets = ar_of_ar
  end
  datasets 
end


# escapes any ' chars
def escape_to_gnuplot(string)
  # long way, but it works.
  new_string = ""
  string.split(//).each do |chr|
    if chr == "'" ; new_string << "\\" end 
    new_string << chr
  end
  new_string
end

def bin_range_arg_to_custom_bin_array(arg)
  range = arg.split ','
  range[2] ||= 1  # no step? then its one
  range.map! {|v| v.to_f } 
  ## Create custom bins from the range numbers
  if range[1] < range[0] ; abort "stop cannot be less than start" end
  create_bins(*range)
end

# returns an array of bins
# stop is always included, even if it is not stepped on
def create_bins(start, stop, step)
  bins = []
  (start...stop).step(step) {|v| bins << v }
  if bins.last != stop
    bins << stop
  end
  bins
end

# takes an array of bins and array of vals
# vals can be an array of arrays (multiple histograms)
# should be given as single array if its a single set
# with = {b|l|lp|p}
# boxes,lines,linespoints,points
def plot_histogram(bins, vals_ar, filename, bin_type, with, labels=nil)
  plot_with = case with
              when 'b' ; 'boxes fs solid'
              when 'l' ; 'lines'
              when 'lp' ; 'linespoints'
              when 'p' ; 'points'
              end
              

  case filename
  when 'terminal'
    output_type = :terminal
  when /\.eps$/
    output_type = :eps
  when /\.png$/
    output_type = :png
  else
    output_type = :png
    filename << ".png"
  end

  terminal_string = ''
  case output_type
  when :terminal
    terminal_string = "x11" 
  when :png
    terminal_string = "png noenhanced" 
  when :eps
    terminal_string = "postscript eps noenhanced color"
    # \"Helvetica\"" 
  end
  Gnuplot.open do |gp|
    Gnuplot::Plot.new( gp ) do |plot|
      plot.terminal terminal_string
      plot.output(filename) unless output_type == :terminal
      #plot.title "Histogram"
      plot.xlabel "bins (type:#{bin_type})"
      plot.ylabel "occurences"
      plot.style "line 1 lt 1"
      plot.style "line 2 lt 12"
      #plot.style  "line 1 lt 1 lw #{opts.lw} pt 7 ps #{opts.ps}",
      #plot.yrange "[-0.05:#{1.0 + 0.2*files.size}]"
      vals_ar.each_with_index do |vals,i|
        plot.data << Gnuplot::DataSet.new([bins,vals]) do |ds|
          ds.with = plot_with
          if labels
            if labels[i]
              ds.title = labels[i]
            else
              ds.notitle
            end
          else
            ds.notitle
          end
        end
      end
    end
  end
end



#################################################
# MAIN:
#################################################
(bins, newargv) = remove_bins(ARGV.to_a)
ARGV.clear

# parse the options out
opts.parse!(newargv)

if opt[:help]
  puts opts
  puts entry_examples(progname)
  exit
end

# get interactive values or pull them off the command line
vals_ar = []
if opt[:input_cols]
  datasets = get_input_vals()
  if datasets.size == 2
    bins = datasets[0]
    vals_ar = [datasets[1]]
  else
    vals_ar = [datasets[0]]
  end
elsif opt[:input_rows]
  datasets = get_input_vals(false)
  ## set the bins to the first dataset if none given
  if bins.size == 0
    bins = datasets.shift
  end
  vals_ar = datasets
elsif opt[:file_rows]
  vals_ar = IO.readlines(opt[:file_rows]).select {|v| v =~ /\d/ and v !~ /^#/ }.map {|v| v.chomp.split(/\s+/) }
elsif opt[:yml_ar]  # just one array of data
  require 'yaml'
  vals_ar = [YAML.load_file(opt[:yml_ar])]
else
  vals_ar = extract_vals(newargv)
end

# print help if we don't have any values given
if vals_ar[0].size == 0
  puts opts
  exit
end


# prep the bins (if one value, then its the # of bins and not a list of bins)
if bins.size == 0
  bins = opt[:bins] ## should be default value since we already swipe the args
elsif bins.size == 1
  bin_arg = bins.first
  ## if there are commas then this specifies a range:
  if bin_arg =~ /,/
    bins = bin_range_arg_to_custom_bin_array(bin_arg)
  ## no commas?  then this is the number of bins:
    bins = bins.map {|v| v.to_f }
  else
    bins = bin_arg.to_i
  end
else # custom array
  bins = bins.map {|v| v.to_f }
end


vec_ar = vals_ar.map do |vals|
  vals.map {|v| v.to_f }
end
first_vec = vec_ar.shift

#outbins, outfreqs = first_vec.histogram(bins, opt[:type].to_sym)  # for one
#puts "BINS: #{bins}"
#[first_vec, *vec_ar].each do |vec|
#  puts "DATA: #{vec}"
#end

output = first_vec.histogram(bins, opt[:type].to_sym, *vec_ar)
# first ar is the bins, rest are the frequencies

# output
if opt[:cols]
  SyncEnumerator.new(output).each do |row|
    puts row.join(' ')
  end
else
  output.each do |row|
    puts row.join(' ')
  end
end

if opt[:labels]
  labels = opt[:labels].split(',')
else
  labels = nil
end

# convert into arrays
output.map! {|vec| vec.to_a }
bins = output.shift
if opt[:plot]
  require 'gnuplot'
  plot_histogram( bins, output, opt[:plot], opt[:type], opt[:with], labels)
end


