From 753fcc204d93d86c76860bee6e2f7d0468c3c940 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 14 Apr 2016 19:43:24 +0200 Subject: features/toxicities fixed --- lib/classification.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 0202940..4a17546 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -10,7 +10,7 @@ module OpenTox confidence = 0.0 neighbors.each do |row| sim = row["tanimoto"] - row["features"][params[:prediction_feature_id].to_s].each do |act| + row["toxicities"][params[:prediction_feature_id].to_s].each do |act| weighted_sum[act] ||= 0 weighted_sum[act] += sim end -- cgit v1.2.3 From 75b70425ae8699464a18529eb7bf35a216c06243 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 21 Apr 2016 09:56:12 +0200 Subject: AMBIT import expanded --- lib/classification.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 4a17546..0de8726 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -8,6 +8,7 @@ module OpenTox weighted_sum = {} sim_sum = 0.0 confidence = 0.0 + # see ~/src/pubchem-read-across/application.rb:353 neighbors.each do |row| sim = row["tanimoto"] row["toxicities"][params[:prediction_feature_id].to_s].each do |act| -- cgit v1.2.3 From 4ebd80fee52c04bd36781f846eae60019918345d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 21 Apr 2016 14:29:23 +0200 Subject: initial classification probabilities --- lib/classification.rb | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 0202940..b9b66f0 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -5,28 +5,28 @@ module OpenTox def self.weighted_majority_vote compound, params neighbors = params[:neighbors] - weighted_sum = {} - sim_sum = 0.0 - confidence = 0.0 - neighbors.each do |row| - sim = row["tanimoto"] - row["features"][params[:prediction_feature_id].to_s].each do |act| - weighted_sum[act] ||= 0 - weighted_sum[act] += sim + feature_id = params[:prediction_feature_id].to_s + sims = {} + neighbors.each do |n| + sim = n["tanimoto"] + n["features"][feature_id].each do |act| + sims[act] ||= [] + sims[act] << sim + #sims[act] << 0.5*sim+0.5 # scale to 1-0.5 end end - case weighted_sum.size - when 1 - return {:value => weighted_sum.keys.first, :confidence => weighted_sum.values.first/neighbors.size.abs} - when 2 - sim_sum = weighted_sum[weighted_sum.keys[0]] - sim_sum -= weighted_sum[weighted_sum.keys[1]] - sim_sum > 0 ? prediction = weighted_sum.keys[0] : prediction = weighted_sum.keys[1] - confidence = (sim_sum/neighbors.size).abs - return {:value => prediction,:confidence => confidence} - else - bad_request_error "Cannot predict more than 2 classes, multinomial classifications is not yet implemented. Received classes were: '#{weighted.sum.keys}'" + sim_all = sims.collect{|a,s| s}.flatten + sim_sum = sim_all.sum + sim_max = sim_all.max + probabilities = {} + sims.each do |a,s| + probabilities[a] = s.sum/sim_sum end + probabilities = probabilities.collect{|a,p| [a,sim_max*p]}.to_h + p_max = probabilities.collect{|a,p| p}.max + prediction = probabilities.key(p_max) + {:value => prediction,:probabilities => probabilities} + end end end -- cgit v1.2.3 From 06fc914653face2c58fd4e6c47161cb03e217582 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 8 May 2016 12:22:58 +0200 Subject: default validations fixed --- lib/classification.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 93b4f0f..4cc9201 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -6,13 +6,14 @@ module OpenTox def self.weighted_majority_vote compound, params neighbors = params[:neighbors] feature_id = params[:prediction_feature_id].to_s + dataset_id = params[:training_dataset_id].to_s sims = {} neighbors.each do |n| sim = n["tanimoto"] - n["toxicities"][feature_id].each do |act| + n["toxicities"][feature_id][dataset_id].each do |act| sims[act] ||= [] sims[act] << sim - end + end if n["toxicities"][feature_id][dataset_id] end sim_all = sims.collect{|a,s| s}.flatten sim_sum = sim_all.sum -- cgit v1.2.3 From b8bb12c8a163c238d7d4387c1914e2100bb660df Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 12 May 2016 15:23:01 +0200 Subject: enm study import fixed --- lib/classification.rb | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 4cc9201..48ff8b3 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -3,17 +3,15 @@ module OpenTox class Classification - def self.weighted_majority_vote compound, params - neighbors = params[:neighbors] - feature_id = params[:prediction_feature_id].to_s - dataset_id = params[:training_dataset_id].to_s + def self.weighted_majority_vote substance, neighbors sims = {} - neighbors.each do |n| - sim = n["tanimoto"] - n["toxicities"][feature_id][dataset_id].each do |act| + neighbors.each do |neighbor| + sim = neighbor["similarity"] + activities = neighbor["toxicities"] + activities.each do |act| sims[act] ||= [] sims[act] << sim - end if n["toxicities"][feature_id][dataset_id] + end if activities end sim_all = sims.collect{|a,s| s}.flatten sim_sum = sim_all.sum @@ -26,7 +24,6 @@ module OpenTox p_max = probabilities.collect{|a,p| p}.max prediction = probabilities.key(p_max) {:value => prediction,:probabilities => probabilities} - end end end -- cgit v1.2.3 From b515a0cfedb887a2af753db6e4a08ae1af430cad Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 31 May 2016 18:08:08 +0200 Subject: cleanup of validation modules/classes --- lib/classification.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 48ff8b3..0f3c6d9 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -7,7 +7,7 @@ module OpenTox sims = {} neighbors.each do |neighbor| sim = neighbor["similarity"] - activities = neighbor["toxicities"] + activities = neighbor["measurements"] activities.each do |act| sims[act] ||= [] sims[act] << sim -- cgit v1.2.3 From 0f31c884d1bcfa448a1bf43a41d8fd6cf88bfc52 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 8 Jun 2016 18:26:07 +0200 Subject: compound tests fixed --- lib/classification.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 0f3c6d9..2ccd7d1 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -3,7 +3,7 @@ module OpenTox class Classification - def self.weighted_majority_vote substance, neighbors + def self.weighted_majority_vote substance:, neighbors: sims = {} neighbors.each do |neighbor| sim = neighbor["similarity"] -- cgit v1.2.3 From adefea0e78a4f05a2c9537e643873ad61fc22a0a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 3 Oct 2016 19:49:55 +0200 Subject: initial model creation tests --- lib/classification.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 2ccd7d1..03c32c4 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -25,7 +25,9 @@ module OpenTox prediction = probabilities.key(p_max) {:value => prediction,:probabilities => probabilities} end + end + end end -- cgit v1.2.3 From 5d4e5e463c2b87241bbb56e4658e1e26c0ed084f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 5 Oct 2016 13:22:12 +0200 Subject: substance and nanoparticle model creation and predictions --- lib/classification.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 03c32c4..01ba878 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -3,7 +3,7 @@ module OpenTox class Classification - def self.weighted_majority_vote substance:, neighbors: + def self.weighted_majority_vote descriptors:nil, neighbors: sims = {} neighbors.each do |neighbor| sim = neighbor["similarity"] -- cgit v1.2.3 From 91787edb3682900bc5a2feeca66e5142f387fcc6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 7 Oct 2016 10:25:58 +0200 Subject: unified interface for prediction algorithms --- lib/classification.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 01ba878..6582e7d 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -3,7 +3,7 @@ module OpenTox class Classification - def self.weighted_majority_vote descriptors:nil, neighbors: + def self.weighted_majority_vote descriptors:nil, neighbors:, method:nil, relevant_features:nil sims = {} neighbors.each do |neighbor| sim = neighbor["similarity"] -- cgit v1.2.3 From dc4ab1f4e64d738d6c0b70f0b690a2359685080f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 12 Oct 2016 21:32:27 +0200 Subject: physchem regression, correlation_filter for fingerprints --- lib/classification.rb | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'lib/classification.rb') diff --git a/lib/classification.rb b/lib/classification.rb index 6582e7d..e8c179f 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -3,24 +3,17 @@ module OpenTox class Classification - def self.weighted_majority_vote descriptors:nil, neighbors:, method:nil, relevant_features:nil - sims = {} - neighbors.each do |neighbor| - sim = neighbor["similarity"] - activities = neighbor["measurements"] - activities.each do |act| - sims[act] ||= [] - sims[act] << sim - end if activities + def self.weighted_majority_vote dependent_variables:, independent_variables:nil, weights:, query_variables: + class_weights = {} + dependent_variables.each_with_index do |v,i| + class_weights[v] ||= [] + class_weights[v] << weights[i] unless v.nil? end - sim_all = sims.collect{|a,s| s}.flatten - sim_sum = sim_all.sum - sim_max = sim_all.max probabilities = {} - sims.each do |a,s| - probabilities[a] = s.sum/sim_sum + class_weights.each do |a,w| + probabilities[a] = w.sum/weights.sum end - probabilities = probabilities.collect{|a,p| [a,sim_max*p]}.to_h + probabilities = probabilities.collect{|a,p| [a,weights.max*p]}.to_h p_max = probabilities.collect{|a,p| p}.max prediction = probabilities.key(p_max) {:value => prediction,:probabilities => probabilities} -- cgit v1.2.3