summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-09 15:11:46 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-09 15:11:46 +0200
commit611bac891177f8d9185d45486dd574b6ef4d1912 (patch)
tree4ebb62998deee6aa02f4a8b94c69bac226c27c27 /lib
parent7794086d367fb256c3673d7578b23ec2fb83e6ed (diff)
nanoparticle models fixed
Diffstat (limited to 'lib')
-rw-r--r--lib/dataset.rb8
-rw-r--r--lib/import.rb6
-rw-r--r--lib/model.rb1
-rw-r--r--lib/nanoparticle.rb37
-rw-r--r--lib/regression.rb2
5 files changed, 30 insertions, 24 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 86800c6..9738c1f 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -62,12 +62,12 @@ module OpenTox
training_cids = training_idxs.collect{|i| substance_ids[i]}
chunk = [training_cids,test_cids].collect do |cids|
dataset = self.class.create(:substance_ids => cids, :feature_ids => feature_ids, :source => self.id )
- dataset.compounds.each do |compound|
- compound.dataset_ids << dataset.id
- compound.toxicities.each do |feature_id,data|
+ dataset.substances.each do |substance|
+ substance.dataset_ids << dataset.id
+ substance.toxicities.each do |feature_id,data|
data[dataset.id.to_s] = data[self.id.to_s] # copy data entries
end
- compound.save
+ substance.save
end
dataset
end
diff --git a/lib/import.rb b/lib/import.rb
index 11cb367..dfe5e2d 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -40,10 +40,10 @@ module OpenTox
datasets[bundle_uri].substance_ids << nanoparticle.id
nanoparticle["dataset_ids"] << datasets[bundle_uri].id
end
+ bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1
study["effects"].each do |effect|
effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature
# TODO parse core/coating
- # TODO parse proteomics, they come as a large textValue
#$logger.debug File.join(np["compound"]["URI"],"study")
effect["conditions"].delete_if { |k, v| v.nil? }
# parse proteomics data
@@ -53,7 +53,7 @@ module OpenTox
:name => identifier,
:category => "Proteomics",
)
- nanoparticle.parse_ambit_value feature, value
+ nanoparticle.parse_ambit_value feature, value, bundle
end
else
feature = klass.find_or_create_by(
@@ -62,7 +62,7 @@ module OpenTox
:category => study["protocol"]["topcategory"],
:conditions => effect["conditions"]
)
- nanoparticle.parse_ambit_value feature, effect["result"]
+ nanoparticle.parse_ambit_value feature, effect["result"], bundle
end
end
nanoparticle.save
diff --git a/lib/model.rb b/lib/model.rb
index 5b094fb..070248a 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -76,6 +76,7 @@ module OpenTox
prediction = {}
if neighbors.collect{|n| n["_id"]}.include? compound.id
+ me = neighbors.select{|n| n["_id"] == compound.id}.first
database_activities = neighbors.select{|n| n["_id"] == compound.id}.first["toxicities"][prediction_feature.id.to_s][training_dataset_id.to_s].uniq
prediction[:database_activities] = database_activities
prediction[:warning] = "#{database_activities.size} compounds have been removed from neighbors, because they have the same structure as the query compound."
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 9bf419d..b79981d 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -9,10 +9,14 @@ module OpenTox
field :proteomics, type: Hash, default: {}
def nanoparticle_neighbors params
- Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| np["tanimoto"] = 1; np}
+ dataset = Dataset.find(params[:training_dataset_id])
+ Dataset.find(params[:training_dataset_id]).nanoparticles.collect do |np|
+ np["tanimoto"] = 1
+ np unless np.toxicities.empty?
+ end.compact
end
- def add_feature feature, value
+ def add_feature feature, value, dataset_id
case feature.category
when "P-CHEM"
physchem_descriptors[feature.id.to_s] ||= []
@@ -23,51 +27,52 @@ module OpenTox
proteomics[feature.id.to_s] << value
proteomics[feature.id.to_s].uniq!
when "TOX"
- toxicities[feature.id.to_s] ||= []
+ toxicities[feature.id.to_s] ||= {}
+ toxicities[feature.id.to_s][dataset_id.to_s] ||= []
# TODO generic way of parsing TOX values
if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)"
- toxicities[feature.id.to_s] << -Math.log10(value)
+ toxicities[feature.id.to_s][dataset_id.to_s] << -Math.log10(value)
else
- toxicities[feature.id.to_s] << value
+ toxicities[feature.id.to_s][dataset_id.to_s] << value
end
- toxicities[feature.id.to_s].uniq!
+ toxicities[feature.id.to_s][dataset_id.to_s].uniq!
else
warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
end
end
- def parse_ambit_value feature, v
+ def parse_ambit_value feature, v, dataset_id
v.delete "unit"
# TODO: ppm instead of weights
if v.keys == ["textValue"]
- add_feature feature, v["textValue"]
+ add_feature feature, v["textValue"], dataset_id
elsif v.keys == ["loValue"]
- add_feature feature, v["loValue"]
+ add_feature feature, v["loValue"], dataset_id
elsif v.keys.size == 2 and v["errorValue"]
- add_feature feature, v["loValue"]
+ add_feature feature, v["loValue"], dataset_id
warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
elsif v.keys.size == 2 and v["loQualifier"] == "mean"
- add_feature feature, v["loValue"]
+ add_feature feature, v["loValue"], dataset_id
warn "'#{feature.name}' is a mean value. Original data is not available."
elsif v.keys.size == 2 and v["loQualifier"] #== ">="
warn "Only min value available for '#{feature.name}', entry ignored"
elsif v.keys.size == 2 and v["upQualifier"] #== ">="
warn "Only max value available for '#{feature.name}', entry ignored"
elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
- add_feature feature, v["loValue"]
+ add_feature feature, v["loValue"], dataset_id
warn "loQualifier and upQualifier are empty."
elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
- add_feature feature, v["loValue"]
+ add_feature feature, v["loValue"], dataset_id
warn "loQualifier and upQualifier are empty."
elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
- add_feature feature, v["loValue"]
+ add_feature feature, v["loValue"], dataset_id
warn "loQualifier and upQualifier are empty."
elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
- add_feature feature, [v["loValue"],v["upValue"]].mean
+ add_feature feature, [v["loValue"],v["upValue"]].mean, dataset_id
warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
- add_feature feature, v["loValue"]
+ add_feature feature, v["loValue"], dataset_id
elsif v == {} # do nothing
else
warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
diff --git a/lib/regression.rb b/lib/regression.rb
index b8a7e5f..691f903 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -10,7 +10,7 @@ module OpenTox
neighbors.each do |row|
sim = row["tanimoto"]
sim ||= 1 # TODO: sim f nanoparticles
- if row["toxicities"][params[:prediction_feature_id].to_s]
+ if row["toxicities"][params[:prediction_feature_id].to_s] and row["toxicities"][params[:prediction_feature_id].to_s][params[:training_dataset_id].to_s]
row["toxicities"][params[:prediction_feature_id].to_s][params[:training_dataset_id].to_s].each do |act|
weighted_sum += sim*act
sim_sum += sim