From 9b0a5f37bdb0a5b80ace7cff559c1817d6916b8a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 28 Dec 2020 18:02:56 +0100 Subject: file prediction output updated --- lib/model.rb | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index 44e0e50..0e011c5 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -63,7 +63,7 @@ class Model end # predict train_model = self.class.new dirs[:train] - train_model.predict_file File.join(dirs[:test],"independent_variables") + train_model.predict_fold File.join(dirs[:test],"independent_variables") puts Time.now-t end puts "Total: #{Time.now-start_time}" @@ -72,7 +72,7 @@ end class ClassificationModel < Model - def predict_file independent_variable_file + def predict_fold independent_variable_file pred_dir = File.dirname independent_variable_file predictions = [] File.readlines(independent_variable_file).each do |line| @@ -84,8 +84,18 @@ class ClassificationModel < Model File.open(File.join(pred_dir,"classification"),"w+") { |f| predictions.each {|p| f.puts p.join(",")} } end - # TODO: with neighbors + def predict_file independent_variable_file + predictions = [] + File.readlines(independent_variable_file).each do |line| + variables = line.chomp.split(",") + variables = variables.collect{|v| v.to_f} if @independent_variable_type == "numeric" + puts predict("",variables).join(",") + end + end + def predict_smiles smiles + c = Compound.from_smiles(smiles) + predict c.smiles, c.fingerprint end def predict smiles, variables @@ -101,14 +111,18 @@ class ClassificationModel < Model neighbor_idx = similarities.each_index.select{|i| similarities[i] > @similarity_thresholds[1]} neighbor_idx = similarities.each_index.select{|i| similarities[i] > @similarity_thresholds[0]} if neighbor_idx.size < 2 # lower similarity threshold neighbor_idx.select!{|i| @smiles[i] != smiles} # remove identical compounds - return [smiles,nil,nil,nil,similarities.max,neighbor_idx.size] if neighbor_idx.size < 2 + experimental = @dependent_variables[@smiles.index(smiles)] if @smiles.include? smiles + return [smiles,experimental,nil,nil,nil,similarities.max,neighbor_idx.size] if neighbor_idx.size < 2 neighbor_dependent_variables = neighbor_idx.collect{|i| @dependent_variables[i]} - neighbor_weights = neighbor_idx.collect{|i| similarities[i]} - probabilities = weighted_majority_vote(neighbor_dependent_variables, neighbor_weights) + neighbor_similarities = neighbor_idx.collect{|i| similarities[i]} + probabilities = weighted_majority_vote(neighbor_dependent_variables, neighbor_similarities) probabilities[1] > probabilities[0] ? classification = 1 : classification = 0 - [ smiles, classification ] + probabilities + [ similarities.max, neighbor_idx.size ] + #p neighbor_dependent_variables.join "," + #p neighbor_similarities.join "," + #p neighbor_idx.collect{|i| @smiles[i]} + [ smiles, experimental, classification ] + probabilities + [ neighbor_similarities.max, neighbor_idx.size ] end # Weighted majority vote -- cgit v1.2.3