From 1dcd741a5bff8dc41abf0840f59031eb557ff230 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 8 Mar 2021 21:25:45 +0100 Subject: neighbor selction adjusted, summary for high-confidence predictions --- lib/model.rb | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index c4ca1f3..d62d889 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -20,7 +20,7 @@ class Model puts "Determining model type." if dependent_variables.uniq == ["1","0"] @dependent_variable_type = "binary" - @train.each {|t| t[1] == "1" ? t[1] = true : t[1] = false } + @train.each {|t| t[1] == t[1].to_i} elsif dependent_variables.collect{|v| v.numeric?}.uniq == [true] @dependent_variable_type = "numeric" @train.each {|t| t[1] = t[1].to_f } @@ -38,7 +38,6 @@ class Model end def predict file -=begin model_type puts "Reading prediction data from #{file}." @batch = File.readlines(file).collect{|l| l.chomp.split(",")} @@ -48,6 +47,7 @@ class Model raise "Incorrect batch independent variables [#{independent_variables.flatten.uniq}]. Must be #{@independent_variable_type}." end if @independent_variable_type == "numeric" + @minsim = [0.9,0.7] @batch.each {|t| t[1..-1] = t[1..-1].collect{|v| v = v.to_f}} select(@independent_variable_names & @batch_independent_variable_names) File.open(File.join(File.dirname(file),"common-variables.csv"),"w+") do |f| @@ -62,7 +62,6 @@ class Model end puts "Feature selection and scaling." puts `Rscript #{File.join(File.dirname(__FILE__),"..","bin","preprocessing.R")} #{File.join(File.dirname(file),"common-variables.csv")} #{File.join(File.dirname(file),"scaled-variables.csv")}` -=end puts "Reading scaled features." lines = File.readlines(File.join(File.dirname(file),"scaled-variables.csv")) @independent_variable_names = @batch_independent_variable_names = lines.shift.chomp.split(",")[2..-1] @@ -82,14 +81,15 @@ class Model File.open(file.sub(".csv","-prediction.csv"),"w+") do |f| f.puts ["Canonical SMILES","Experimental","Prediction","p-inactive","p-active","Max Simimilarity","Nr. Neighbors"].join(",") @scaled_batch.each do |pred| - classification(pred[0], @scaled_train.collect{|row| row[0..1] + [Distance.euclid([row[2..-1],pred[1..-1]])]}).each do |pred| - #classification(pred[0], @scaled_train.collect{|row| row[0..1] + [Similarity.cosine([row[2..-1],pred[1..-1]])]}).each do |pred| + classification(pred[0], @scaled_train.collect{|row| row[0..1] + [Similarity.cosine([row[2..-1],pred[1..-1]])]}).each do |pred| f.puts pred.join(",") - puts pred.join(",") + #puts pred.join(",") end end end - #end + elsif @independent_variable_type == "set" + @minsim = [0.5,0.2] + end end def select variable_names @@ -104,15 +104,11 @@ class Model def classification smiles, train experimental = train.select{|row| row[0] == smiles} train = train-experimental - #train.select!{|row| row[2] > 0.8} - #train.select!{|row| row[2] > 0.5} if train.size < 2 - #train.select!{|row| row[2] > 0.5} - #train.select!{|row| row[2] > -1.0} if train.size < 2 - puts "==" - puts smiles - puts train.sort_by{|r| r[2]}[0..10].collect{|r| r.join(",")}.join("\n") + n = train.select{|row| row[2] > @minsim[0]} + n = train.select!{|row| row[2] > @minsim[1]} if n.size < 2 + train = n + #puts train.sort_by{|r| r[2]}[0..5].collect{|r| r.join(",")}.join("\n") #puts train.sort_by{|r| r[2]}.reverse.collect{|r| r.join(",")}.join("\n") - puts "--" if train.size < 2 classification = nil probabilities = [nil,nil] -- cgit v1.2.3