summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-08 12:22:58 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-08 12:22:58 +0200
commit06fc914653face2c58fd4e6c47161cb03e217582 (patch)
treef001a28b3970f67bf648f6d00e95791a063e7fd5 /lib
parent110b470a69f785f195cce21df7c07efa5c9ce61b (diff)
default validations fixed
Diffstat (limited to 'lib')
-rw-r--r--lib/classification.rb5
-rw-r--r--lib/compound.rb2
-rw-r--r--lib/crossvalidation.rb4
-rw-r--r--lib/dataset.rb15
-rw-r--r--lib/leave-one-out-validation.rb2
-rw-r--r--lib/model.rb5
-rw-r--r--lib/regression.rb10
-rw-r--r--lib/validation.rb4
8 files changed, 25 insertions, 22 deletions
diff --git a/lib/classification.rb b/lib/classification.rb
index 93b4f0f..4cc9201 100644
--- a/lib/classification.rb
+++ b/lib/classification.rb
@@ -6,13 +6,14 @@ module OpenTox
def self.weighted_majority_vote compound, params
neighbors = params[:neighbors]
feature_id = params[:prediction_feature_id].to_s
+ dataset_id = params[:training_dataset_id].to_s
sims = {}
neighbors.each do |n|
sim = n["tanimoto"]
- n["toxicities"][feature_id].each do |act|
+ n["toxicities"][feature_id][dataset_id].each do |act|
sims[act] ||= []
sims[act] << sim
- end
+ end if n["toxicities"][feature_id][dataset_id]
end
sim_all = sims.collect{|a,s| s}.flatten
sim_sum = sim_all.sum
diff --git a/lib/compound.rb b/lib/compound.rb
index c2ce5d0..3af6f6c 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -288,7 +288,7 @@ module OpenTox
training_dataset.compounds.each do |compound|
candidate_fingerprint = compound.fingerprint params[:type]
sim = (query_fingerprint & candidate_fingerprint).size/(query_fingerprint | candidate_fingerprint).size.to_f
- neighbors << {"_id" => compound.id, "toxicities" => {prediction_feature.id.to_s => compound.toxicities[prediction_feature.id.to_s]}, "tanimoto" => sim} if sim >= params[:min_sim]
+ neighbors << {"_id" => compound.id, "toxicities" => {prediction_feature.id.to_s => {training_dataset_id.to_s => compound.toxicities[prediction_feature.id.to_s][training_dataset_id.to_s]}}, "tanimoto" => sim} if sim >= params[:min_sim]
end
neighbors.sort!{|a,b| b["tanimoto"] <=> a["tanimoto"]}
end
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index e1f956b..8e0c5b9 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -133,14 +133,12 @@ module OpenTox
neighbors = compound.send(model.neighbor_algorithm,model.neighbor_algorithm_parameters)
neighbors.collect! do |n|
neighbor = Compound.find(n[0])
- { :smiles => neighbor.smiles, :similarity => n[1], :measurements => neighbor.toxicities[prediction_feature.id.to_s]}
+ { :smiles => neighbor.smiles, :similarity => n[1], :measurements => neighbor.toxicities[prediction_feature.id.to_s][training_dataset.id.to_s]}
end
{
:smiles => compound.smiles,
- #:fingerprint => compound.fp4.collect{|id| Smarts.find(id).name},
:measured => p[1],
:predicted => p[2],
- #:relative_error => (Math.log10(p[1])-Math.log10(p[2])).abs/Math.log10(p[1]).to_f.abs,
:error => (p[1]-p[2]).abs,
:relative_error => (p[1]-p[2]).abs/p[1],
:confidence => p[3],
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 9b24440..86800c6 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -64,6 +64,9 @@ module OpenTox
dataset = self.class.create(:substance_ids => cids, :feature_ids => feature_ids, :source => self.id )
dataset.compounds.each do |compound|
compound.dataset_ids << dataset.id
+ compound.toxicities.each do |feature_id,data|
+ data[dataset.id.to_s] = data[self.id.to_s] # copy data entries
+ end
compound.save
end
dataset
@@ -92,7 +95,7 @@ module OpenTox
else
name = substance.name
end
- nr_measurements = features.collect{|f| substance.toxicities[f.id.to_s].size if substance.toxicities[f.id.to_s]}.compact.uniq
+ nr_measurements = features.collect{|f| substance.toxicities[f.id.to_s][self.id.to_s].size if substance.toxicities[f.id.to_s]}.compact.uniq
if nr_measurements.size > 1
warn "Unequal number of measurements (#{nr_measurements}) for '#{name}'. Skipping entries."
@@ -100,8 +103,8 @@ module OpenTox
(0..nr_measurements.first-1).each do |i|
row = [name]
features.each do |f|
- if substance.toxicities[f.id.to_s]
- row << substance.toxicities[f.id.to_s][i]
+ if substance.toxicities[f.id.to_s] and substance.toxicities[f.id.to_s][self.id.to_s]
+ row << substance.toxicities[f.id.to_s][self.id.to_s][i]
else
row << ""
end
@@ -149,7 +152,6 @@ module OpenTox
feature_names = table.shift.collect{|f| f.strip}
warnings << "Duplicated features in table header." unless feature_names.size == feature_names.uniq.size
compound_format = feature_names.shift.strip
- # TODO nanoparticles
bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i
numeric = []
# guess feature types
@@ -219,8 +221,9 @@ module OpenTox
else
v = v.strip
end
- compound.toxicities[feature_ids[j].to_s] ||= []
- compound.toxicities[feature_ids[j].to_s] << v
+ compound.toxicities[feature_ids[j].to_s] ||= {}
+ compound.toxicities[feature_ids[j].to_s][self.id.to_s] ||= []
+ compound.toxicities[feature_ids[j].to_s][self.id.to_s] << v
compound.save
end
end
diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb
index ed917eb..2306041 100644
--- a/lib/leave-one-out-validation.rb
+++ b/lib/leave-one-out-validation.rb
@@ -19,7 +19,7 @@ module OpenTox
nr_unpredicted = 0
predictions.each do |cid,prediction|
if prediction[:value]
- prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s]
+ prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][dataset_id.to_s]
else
nr_unpredicted += 1
end
diff --git a/lib/model.rb b/lib/model.rb
index 841ab20..5b094fb 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -41,7 +41,7 @@ module OpenTox
toxicities = []
substances = []
training_dataset.substances.each do |s|
- s["toxicities"][prediction_feature_id].each do |act|
+ s["toxicities"][prediction_feature_id][training_dataset_id.to_s].each do |act|
toxicities << act
substances << s
end
@@ -76,8 +76,7 @@ module OpenTox
prediction = {}
if neighbors.collect{|n| n["_id"]}.include? compound.id
- #TODO restrict to dataset features
- database_activities = neighbors.select{|n| n["_id"] == compound.id}.first["toxicities"][prediction_feature.id.to_s].uniq
+ database_activities = neighbors.select{|n| n["_id"] == compound.id}.first["toxicities"][prediction_feature.id.to_s][training_dataset_id.to_s].uniq
prediction[:database_activities] = database_activities
prediction[:warning] = "#{database_activities.size} compounds have been removed from neighbors, because they have the same structure as the query compound."
neighbors.delete_if{|n| n["_id"] == compound.id}
diff --git a/lib/regression.rb b/lib/regression.rb
index d2c4e91..13e1380 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -11,7 +11,7 @@ module OpenTox
sim = row["tanimoto"]
sim ||= 1 # TODO: sim f nanoparticles
if row["toxicities"][params[:prediction_feature_id].to_s]
- row["toxicities"][params[:prediction_feature_id].to_s].each do |act|
+ row["toxicities"][params[:prediction_feature_id].to_s][params[:training_dataset_id].to_s].each do |act|
weighted_sum += sim*act
sim_sum += sim
end
@@ -33,7 +33,7 @@ module OpenTox
neighbor = Compound.find row["_id"]
fingerprint = neighbor.fingerprint
if row["toxicities"][params[:prediction_feature_id].to_s]
- row["toxicities"][params[:prediction_feature_id].to_s].each do |act|
+ row["toxicities"][params[:prediction_feature_id].to_s][params[:training_dataset_id].to_s].each do |act|
activities << act
weights << row["tanimoto"]
fingerprint_ids.each_with_index do |id,j|
@@ -77,10 +77,10 @@ module OpenTox
def self.local_physchem_regression compound, params, method="pls"#, method_params="ncomp = 4"
- neighbors = params[:neighbors].select{|n| n["toxicities"][params[:prediction_feature_id].to_s]} # use only neighbors with measured activities
+ neighbors = params[:neighbors].select{|n| n["toxicities"][params[:prediction_feature_id].to_s] and n["toxicities"][params[:prediction_feature_id].to_s][params[:training_dataset_id].to_s]} # use only neighbors with measured activities
return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
- return {:value => neighbors.first["toxicities"][params[:prediction_feature_id]], :confidence => nil, :warning => "Only one similar compound in the training set"} unless neighbors.size > 1
+ return {:value => neighbors.first["toxicities"][params[:prediction_feature_id].to_s][params[:training_dataset_id].to_s], :confidence => nil, :warning => "Only one similar compound in the training set"} unless neighbors.size > 1
activities = []
weights = []
@@ -90,7 +90,7 @@ module OpenTox
neighbors.each_with_index do |n,i|
neighbor = Substance.find(n["_id"])
- n["toxicities"][params[:prediction_feature_id].to_s].each do |act|
+ n["toxicities"][params[:prediction_feature_id].to_s][params[:training_dataset_id].to_s].each do |act|
data_frame[0][i] = act
n["tanimoto"] ? weights << n["tanimoto"] : weights << 1.0 # TODO cosine ?
neighbor.physchem_descriptors.each do |pid,values|
diff --git a/lib/validation.rb b/lib/validation.rb
index 68cb1a1..334efd7 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -34,7 +34,9 @@ module OpenTox
nr_unpredicted = 0
predictions.each do |cid,prediction|
if prediction[:value]
- prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s]
+ tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s]
+ #prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][test_set.id.to_s]
+ prediction[:measured] = tox[test_set.id.to_s] if tox
else
nr_unpredicted += 1
end