summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-04-21 15:06:10 +0200
committerChristoph Helma <helma@in-silico.ch>2016-04-21 15:06:10 +0200
commit6890687df6de0a6eaa3d35c6be66639614ea2ef2 (patch)
treec863c5857d8716d35eb3a22efd3da24c5f27b968
parent75b70425ae8699464a18529eb7bf35a216c06243 (diff)
parent4ebd80fee52c04bd36781f846eae60019918345d (diff)
probabilities branch merged
-rw-r--r--VERSION2
-rw-r--r--ext/lazar/extconf.rb4
-rw-r--r--lib/classification.rb38
-rw-r--r--lib/crossvalidation.rb2
-rw-r--r--lib/validation-statistics.rb13
5 files changed, 30 insertions, 29 deletions
diff --git a/VERSION b/VERSION
index 2003b63..965065d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.9.2
+0.9.3
diff --git a/ext/lazar/extconf.rb b/ext/lazar/extconf.rb
index 0e607f3..d3d2756 100644
--- a/ext/lazar/extconf.rb
+++ b/ext/lazar/extconf.rb
@@ -20,7 +20,9 @@ rinstall = File.expand_path(File.join(File.dirname(__FILE__),"rinstall.R"))
puts `Rscript --vanilla #{rinstall} #{r_dir}`
r_libs = Dir[File.join(r_dir,"*")].collect{|l| l.sub(r_dir, '').sub('/','')}.sort
-abort "Failed to install R packages." unless r_libs == ["caret","doMC","foreach","ggplot2","gridExtra","iterators","pls"].sort
+["caret","doMC","foreach","ggplot2","gridExtra","iterators","pls"].each do |lib|
+ abort "Failed to install R package '#{lib}'." unless r_libs.include?(lib)
+end
# create a fake Makefile
File.open(File.join(File.dirname(__FILE__),"Makefile"),"w+") do |makefile|
diff --git a/lib/classification.rb b/lib/classification.rb
index 0de8726..93b4f0f 100644
--- a/lib/classification.rb
+++ b/lib/classification.rb
@@ -5,29 +5,27 @@ module OpenTox
def self.weighted_majority_vote compound, params
neighbors = params[:neighbors]
- weighted_sum = {}
- sim_sum = 0.0
- confidence = 0.0
- # see ~/src/pubchem-read-across/application.rb:353
- neighbors.each do |row|
- sim = row["tanimoto"]
- row["toxicities"][params[:prediction_feature_id].to_s].each do |act|
- weighted_sum[act] ||= 0
- weighted_sum[act] += sim
+ feature_id = params[:prediction_feature_id].to_s
+ sims = {}
+ neighbors.each do |n|
+ sim = n["tanimoto"]
+ n["toxicities"][feature_id].each do |act|
+ sims[act] ||= []
+ sims[act] << sim
end
end
- case weighted_sum.size
- when 1
- return {:value => weighted_sum.keys.first, :confidence => weighted_sum.values.first/neighbors.size.abs}
- when 2
- sim_sum = weighted_sum[weighted_sum.keys[0]]
- sim_sum -= weighted_sum[weighted_sum.keys[1]]
- sim_sum > 0 ? prediction = weighted_sum.keys[0] : prediction = weighted_sum.keys[1]
- confidence = (sim_sum/neighbors.size).abs
- return {:value => prediction,:confidence => confidence}
- else
- bad_request_error "Cannot predict more than 2 classes, multinomial classifications is not yet implemented. Received classes were: '#{weighted.sum.keys}'"
+ sim_all = sims.collect{|a,s| s}.flatten
+ sim_sum = sim_all.sum
+ sim_max = sim_all.max
+ probabilities = {}
+ sims.each do |a,s|
+ probabilities[a] = s.sum/sim_sum
end
+ probabilities = probabilities.collect{|a,p| [a,sim_max*p]}.to_h
+ p_max = probabilities.collect{|a,p| p}.max
+ prediction = probabilities.key(p_max)
+ {:value => prediction,:probabilities => probabilities}
+
end
end
end
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index 752d393..50afb6f 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -54,7 +54,7 @@ module OpenTox
cv.update_attributes(
nr_instances: nr_instances,
nr_unpredicted: nr_unpredicted,
- predictions: predictions#.sort{|a,b| b[3] <=> a[3]} # sort according to confidence
+ predictions: predictions
)
$logger.debug "Nr unpredicted: #{nr_unpredicted}"
cv.statistics
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 570b2d4..c6b2a07 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -15,21 +15,21 @@ module OpenTox
if pred[:value] == m
if pred[:value] == accept_values[0]
confusion_matrix[0][0] += 1
- weighted_confusion_matrix[0][0] += pred[:confidence]
+ weighted_confusion_matrix[0][0] += pred[:probabilities][pred[:value]]
nr_instances += 1
elsif pred[:value] == accept_values[1]
confusion_matrix[1][1] += 1
- weighted_confusion_matrix[1][1] += pred[:confidence]
+ weighted_confusion_matrix[1][1] += pred[:probabilities][pred[:value]]
nr_instances += 1
end
elsif pred[:value] != m
if pred[:value] == accept_values[0]
confusion_matrix[0][1] += 1
- weighted_confusion_matrix[0][1] += pred[:confidence]
+ weighted_confusion_matrix[0][1] += pred[:probabilities][pred[:value]]
nr_instances += 1
elsif pred[:value] == accept_values[1]
confusion_matrix[1][0] += 1
- weighted_confusion_matrix[1][0] += pred[:confidence]
+ weighted_confusion_matrix[1][0] += pred[:probabilities][pred[:value]]
nr_instances += 1
end
end
@@ -47,14 +47,15 @@ module OpenTox
confidence_sum += c
end
end
- accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f
+ accuracy = (confusion_matrix[0][0]+confusion_matrix[1][1])/nr_instances.to_f
+ weighted_accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f
$logger.debug "Accuracy #{accuracy}"
{
:accept_values => accept_values,
:confusion_matrix => confusion_matrix,
:weighted_confusion_matrix => weighted_confusion_matrix,
:accuracy => accuracy,
- :weighted_accuracy => (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f,
+ :weighted_accuracy => weighted_accuracy,
:true_rate => true_rate,
:predictivity => predictivity,
:finished_at => Time.now