require_relative "setup.rb"


class NanoparticleTest  < MiniTest::Test
  include OpenTox::Validation

  def setup
    @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
    unless @training_dataset
      Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
      @training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
    end
    @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
  end

  def test_create_model
    model = Model::Lazar.create training_dataset: @training_dataset
    nanoparticle = @training_dataset.nanoparticles[-34]
    prediction = model.predict nanoparticle
    p prediction
    refute_nil prediction[:value]
    assert_includes nanoparticle.dataset_ids, @training_dataset.id
    model.delete
  end

  def test_inspect_cv
    skip
    cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
    #p cv
    #p cv.id
    #cv.correlation_plot_id = nil
    File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
    #p cv.statistics
    #p cv.model.@training_dataset.substances.first.physchem_descriptors.keys.collect{|d| Feature.find(d).name}
    CrossValidation.all.sort_by{|cv| cv.created_at}.reverse.each do |cv|
      p cv.name
      p cv.created_at
      begin
      p cv.r_squared
      rescue
      end
    end
  end
  def test_inspect_worst_prediction
    skip
  
    cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
    worst_predictions = cv.worst_predictions(n: 3,show_neigbors: false)
    assert_equal 3, worst_predictions.size
    assert_kind_of Integer, worst_predictions.first[:neighbors]
    worst_predictions = cv.worst_predictions
    assert_equal 5, worst_predictions.size
    assert_kind_of Array, worst_predictions.first[:neighbors]
    assert_kind_of Integer, worst_predictions.first[:neighbors].first[:common_descriptors]
    puts worst_predictions.to_yaml
    worst_predictions = cv.worst_predictions(n: 2, show_common_descriptors: true)
    #puts worst_predictions.to_yaml
    assert_equal 2, worst_predictions.size
    assert_kind_of Array, worst_predictions.first[:neighbors]
    refute_nil worst_predictions.first[:neighbors].first[:common_descriptors]
    #p cv.model.training_dataset.features
  end

  def test_validate_model
    algorithms = { :prediction => {:method => "Algorithm::Regression.weighted_average" } }
    model = Model::Lazar.create training_dataset: @training_dataset
    cv = RegressionCrossValidation.create model
    p cv.rmse
    p cv.r_squared
    #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
    refute_nil cv.r_squared
    refute_nil cv.rmse
  end

  def test_validate_pls_model
    algorithms = {
      :descriptors => {
        :method => "properties",
        :types => ["P-CHEM"]
      },
      :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'pls' },
    }
    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
    cv = RegressionCrossValidation.create model
    p cv.rmse
    p cv.r_squared
    refute_nil cv.r_squared
    refute_nil cv.rmse
  end

  def test_validate_random_forest_model
    algorithms = {
      :descriptors => {
        :method => "properties",
        :types => ["P-CHEM"]
      },
      :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'rf' }
    }
    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
    cv = RegressionCrossValidation.create model
    p cv.rmse
    p cv.r_squared
    refute_nil cv.r_squared
    refute_nil cv.rmse
  end

  def test_validate_proteomics_pls_model
    algorithms = {
      :descriptors => {
        :method => "properties",
        :types => ["Proteomics"]
      },
      :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'rf' }
    }
    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
    cv = RegressionCrossValidation.create model
    p cv.rmse
    p cv.r_squared
    refute_nil cv.r_squared
    refute_nil cv.rmse
  end

  def test_validate_all_default_model
    algorithms = {
      :descriptors => {
        :types => ["Proteomics","P-CHEM"]
      },
    }
    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
    cv = RegressionCrossValidation.create model
    p cv.rmse
    p cv.r_squared
    refute_nil cv.r_squared
    refute_nil cv.rmse
  end

  def test_export
    skip
    Dataset.all.each do |d|
      puts d.to_csv
    end
  end

  def test_summaries
    skip
    datasets = Dataset.all
    datasets = datasets.select{|d| !d.name.nil?}
    datasets.each do |d|
      
      #p d.features.select{|f| f.name.match (/Total/)}
      #p d.features.collect{|f| "#{f.name} #{f.unit} #{f.conditions}"}
      p d.features.uniq.collect{|f| f.name}
    end
    assert_equal 9, datasets.size
=begin
    features = Feature.all.to_a
    #p features.collect do |f|
      #f if f.category == "TOX"
    #end.to_a.flatten.size
    toxcounts = {}
    pccounts = {}
    Nanoparticle.all.each do |np|
      np.measurements.each do |t,v|
        toxcounts[t] ||= 0
        toxcounts[t] += 1#v.uniq.size
      end
      np.physchem_descriptors.each do |t,v|
        pccounts[t] ||= 0
        pccounts[t] += 1#v.uniq.size
      end
    end
    #puts counts.keys.collect{|i| Feature.find(i)}.to_yaml
    #pccounts.each{|e,n| p Feature.find(e),n if n > 100}
    #p toxcounts.collect{|e,n| Feature.find(e).name if n > 1}.uniq
    toxcounts.each{|e,n| p Feature.find(e),n if n > 100}
=end
  end


  def test_import_ld
    skip
    dataset_ids = Import::Enanomapper.import_ld
  end
end