summaryrefslogtreecommitdiff
path: root/lib/nanoparticle.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-12 15:23:01 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-12 15:23:01 +0200
commitb8bb12c8a163c238d7d4387c1914e2100bb660df (patch)
tree791d1524e2294d8a3a38658607a644d7576784ae /lib/nanoparticle.rb
parent937bfbaf058aea5973927cb3bf6b51028b312ed9 (diff)
enm study import fixed
Diffstat (limited to 'lib/nanoparticle.rb')
-rw-r--r--lib/nanoparticle.rb80
1 files changed, 50 insertions, 30 deletions
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index b79981d..6527fa3 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -8,15 +8,31 @@ module OpenTox
field :bundles, type: Array, default: []
field :proteomics, type: Hash, default: {}
- def nanoparticle_neighbors params
- dataset = Dataset.find(params[:training_dataset_id])
- Dataset.find(params[:training_dataset_id]).nanoparticles.collect do |np|
- np["tanimoto"] = 1
- np unless np.toxicities.empty?
- end.compact
+ def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id:
+ dataset = Dataset.find(dataset_id)
+ neighbors = []
+ p dataset.data_entries.size
+ p dataset.substance_ids.size
+ p dataset.substance_ids.collect{|i| i.to_s} == dataset.data_entries.keys
+ p dataset.substance_ids.collect{|i| i.to_s}
+ p dataset.data_entries.keys
+ dataset.nanoparticles.each do |np|
+ prediction_feature_id
+ p dataset.data_entries[np.id.to_s]
+ values = dataset.values(np,prediction_feature_id)
+ p values
+ if values
+ common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys
+ sim = Algorithm::Similarity.cosine(common_descriptors.collect{|d| physchem_descriptors[d]}, common_descriptors.collect{|d| np.physchem_descriptors[d]})
+ neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
+ end
+ end
+ neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
+ neighbors
end
def add_feature feature, value, dataset_id
+ dataset = Dataset.find(dataset_id)
case feature.category
when "P-CHEM"
physchem_descriptors[feature.id.to_s] ||= []
@@ -27,55 +43,59 @@ module OpenTox
proteomics[feature.id.to_s] << value
proteomics[feature.id.to_s].uniq!
when "TOX"
- toxicities[feature.id.to_s] ||= {}
- toxicities[feature.id.to_s][dataset_id.to_s] ||= []
# TODO generic way of parsing TOX values
+ p dataset.name
+ p self.name
+ p feature.name
+ p feature.unit
+ p value
if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)"
- toxicities[feature.id.to_s][dataset_id.to_s] << -Math.log10(value)
+ dataset.add self, feature, -Math.log10(value)
else
- toxicities[feature.id.to_s][dataset_id.to_s] << value
+ dataset.add self, feature, value
end
- toxicities[feature.id.to_s][dataset_id.to_s].uniq!
+ dataset.save
else
warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
end
end
def parse_ambit_value feature, v, dataset_id
+ dataset = Dataset.find(dataset_id)
v.delete "unit"
# TODO: ppm instead of weights
if v.keys == ["textValue"]
- add_feature feature, v["textValue"], dataset_id
+ add_feature feature, v["textValue"], dataset
elsif v.keys == ["loValue"]
- add_feature feature, v["loValue"], dataset_id
+ add_feature feature, v["loValue"], dataset
elsif v.keys.size == 2 and v["errorValue"]
- add_feature feature, v["loValue"], dataset_id
- warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+ add_feature feature, v["loValue"], dataset
+ #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
elsif v.keys.size == 2 and v["loQualifier"] == "mean"
- add_feature feature, v["loValue"], dataset_id
- warn "'#{feature.name}' is a mean value. Original data is not available."
+ add_feature feature, v["loValue"], dataset
+ #warn "'#{feature.name}' is a mean value. Original data is not available."
elsif v.keys.size == 2 and v["loQualifier"] #== ">="
- warn "Only min value available for '#{feature.name}', entry ignored"
+ #warn "Only min value available for '#{feature.name}', entry ignored"
elsif v.keys.size == 2 and v["upQualifier"] #== ">="
- warn "Only max value available for '#{feature.name}', entry ignored"
+ #warn "Only max value available for '#{feature.name}', entry ignored"
elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
- add_feature feature, v["loValue"], dataset_id
- warn "loQualifier and upQualifier are empty."
+ add_feature feature, v["loValue"], dataset
+ #warn "loQualifier and upQualifier are empty."
elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
- add_feature feature, v["loValue"], dataset_id
- warn "loQualifier and upQualifier are empty."
+ add_feature feature, v["loValue"], dataset
+ #warn "loQualifier and upQualifier are empty."
elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
- add_feature feature, v["loValue"], dataset_id
- warn "loQualifier and upQualifier are empty."
+ add_feature feature, v["loValue"], dataset
+ #warn "loQualifier and upQualifier are empty."
elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
- add_feature feature, [v["loValue"],v["upValue"]].mean, dataset_id
- warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+ add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
+ #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
- warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
- add_feature feature, v["loValue"], dataset_id
+ #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+ add_feature feature, v["loValue"], dataset
elsif v == {} # do nothing
else
- warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
+ #warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
end
end