summaryrefslogtreecommitdiff
path: root/lib/model.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/model.rb')
-rw-r--r--lib/model.rb26
1 files changed, 11 insertions, 15 deletions
diff --git a/lib/model.rb b/lib/model.rb
index c4ca1f3..d62d889 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -20,7 +20,7 @@ class Model
puts "Determining model type."
if dependent_variables.uniq == ["1","0"]
@dependent_variable_type = "binary"
- @train.each {|t| t[1] == "1" ? t[1] = true : t[1] = false }
+ @train.each {|t| t[1] == t[1].to_i}
elsif dependent_variables.collect{|v| v.numeric?}.uniq == [true]
@dependent_variable_type = "numeric"
@train.each {|t| t[1] = t[1].to_f }
@@ -38,7 +38,6 @@ class Model
end
def predict file
-=begin
model_type
puts "Reading prediction data from #{file}."
@batch = File.readlines(file).collect{|l| l.chomp.split(",")}
@@ -48,6 +47,7 @@ class Model
raise "Incorrect batch independent variables [#{independent_variables.flatten.uniq}]. Must be #{@independent_variable_type}."
end
if @independent_variable_type == "numeric"
+ @minsim = [0.9,0.7]
@batch.each {|t| t[1..-1] = t[1..-1].collect{|v| v = v.to_f}}
select(@independent_variable_names & @batch_independent_variable_names)
File.open(File.join(File.dirname(file),"common-variables.csv"),"w+") do |f|
@@ -62,7 +62,6 @@ class Model
end
puts "Feature selection and scaling."
puts `Rscript #{File.join(File.dirname(__FILE__),"..","bin","preprocessing.R")} #{File.join(File.dirname(file),"common-variables.csv")} #{File.join(File.dirname(file),"scaled-variables.csv")}`
-=end
puts "Reading scaled features."
lines = File.readlines(File.join(File.dirname(file),"scaled-variables.csv"))
@independent_variable_names = @batch_independent_variable_names = lines.shift.chomp.split(",")[2..-1]
@@ -82,14 +81,15 @@ class Model
File.open(file.sub(".csv","-prediction.csv"),"w+") do |f|
f.puts ["Canonical SMILES","Experimental","Prediction","p-inactive","p-active","Max Simimilarity","Nr. Neighbors"].join(",")
@scaled_batch.each do |pred|
- classification(pred[0], @scaled_train.collect{|row| row[0..1] + [Distance.euclid([row[2..-1],pred[1..-1]])]}).each do |pred|
- #classification(pred[0], @scaled_train.collect{|row| row[0..1] + [Similarity.cosine([row[2..-1],pred[1..-1]])]}).each do |pred|
+ classification(pred[0], @scaled_train.collect{|row| row[0..1] + [Similarity.cosine([row[2..-1],pred[1..-1]])]}).each do |pred|
f.puts pred.join(",")
- puts pred.join(",")
+ #puts pred.join(",")
end
end
end
- #end
+ elsif @independent_variable_type == "set"
+ @minsim = [0.5,0.2]
+ end
end
def select variable_names
@@ -104,15 +104,11 @@ class Model
def classification smiles, train
experimental = train.select{|row| row[0] == smiles}
train = train-experimental
- #train.select!{|row| row[2] > 0.8}
- #train.select!{|row| row[2] > 0.5} if train.size < 2
- #train.select!{|row| row[2] > 0.5}
- #train.select!{|row| row[2] > -1.0} if train.size < 2
- puts "=="
- puts smiles
- puts train.sort_by{|r| r[2]}[0..10].collect{|r| r.join(",")}.join("\n")
+ n = train.select{|row| row[2] > @minsim[0]}
+ n = train.select!{|row| row[2] > @minsim[1]} if n.size < 2
+ train = n
+ #puts train.sort_by{|r| r[2]}[0..5].collect{|r| r.join(",")}.join("\n")
#puts train.sort_by{|r| r[2]}.reverse.collect{|r| r.join(",")}.join("\n")
- puts "--"
if train.size < 2
classification = nil
probabilities = [nil,nil]