summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/model.rb28
1 files changed, 21 insertions, 7 deletions
diff --git a/lib/model.rb b/lib/model.rb
index 44e0e50..0e011c5 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -63,7 +63,7 @@ class Model
end
# predict
train_model = self.class.new dirs[:train]
- train_model.predict_file File.join(dirs[:test],"independent_variables")
+ train_model.predict_fold File.join(dirs[:test],"independent_variables")
puts Time.now-t
end
puts "Total: #{Time.now-start_time}"
@@ -72,7 +72,7 @@ end
class ClassificationModel < Model
- def predict_file independent_variable_file
+ def predict_fold independent_variable_file
pred_dir = File.dirname independent_variable_file
predictions = []
File.readlines(independent_variable_file).each do |line|
@@ -84,8 +84,18 @@ class ClassificationModel < Model
File.open(File.join(pred_dir,"classification"),"w+") { |f| predictions.each {|p| f.puts p.join(",")} }
end
- # TODO: with neighbors
+ def predict_file independent_variable_file
+ predictions = []
+ File.readlines(independent_variable_file).each do |line|
+ variables = line.chomp.split(",")
+ variables = variables.collect{|v| v.to_f} if @independent_variable_type == "numeric"
+ puts predict("",variables).join(",")
+ end
+ end
+
def predict_smiles smiles
+ c = Compound.from_smiles(smiles)
+ predict c.smiles, c.fingerprint
end
def predict smiles, variables
@@ -101,14 +111,18 @@ class ClassificationModel < Model
neighbor_idx = similarities.each_index.select{|i| similarities[i] > @similarity_thresholds[1]}
neighbor_idx = similarities.each_index.select{|i| similarities[i] > @similarity_thresholds[0]} if neighbor_idx.size < 2 # lower similarity threshold
neighbor_idx.select!{|i| @smiles[i] != smiles} # remove identical compounds
- return [smiles,nil,nil,nil,similarities.max,neighbor_idx.size] if neighbor_idx.size < 2
+ experimental = @dependent_variables[@smiles.index(smiles)] if @smiles.include? smiles
+ return [smiles,experimental,nil,nil,nil,similarities.max,neighbor_idx.size] if neighbor_idx.size < 2
neighbor_dependent_variables = neighbor_idx.collect{|i| @dependent_variables[i]}
- neighbor_weights = neighbor_idx.collect{|i| similarities[i]}
- probabilities = weighted_majority_vote(neighbor_dependent_variables, neighbor_weights)
+ neighbor_similarities = neighbor_idx.collect{|i| similarities[i]}
+ probabilities = weighted_majority_vote(neighbor_dependent_variables, neighbor_similarities)
probabilities[1] > probabilities[0] ? classification = 1 : classification = 0
- [ smiles, classification ] + probabilities + [ similarities.max, neighbor_idx.size ]
+ #p neighbor_dependent_variables.join ","
+ #p neighbor_similarities.join ","
+ #p neighbor_idx.collect{|i| @smiles[i]}
+ [ smiles, experimental, classification ] + probabilities + [ neighbor_similarities.max, neighbor_idx.size ]
end
# Weighted majority vote