summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-13 13:38:24 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-13 13:38:24 +0200
commitc90644211e214a50f6fdb3a936bf247f45f1f4be (patch)
tree9ae3f0b33feb55f3904c4d7a08e39567223b07aa
parentb8bb12c8a163c238d7d4387c1914e2100bb660df (diff)
compound tests fixed
-rw-r--r--lib/compound.rb27
-rw-r--r--lib/crossvalidation.rb26
-rw-r--r--lib/dataset.rb41
-rw-r--r--lib/import.rb9
-rw-r--r--lib/lazar.rb2
-rw-r--r--lib/leave-one-out-validation.rb31
-rw-r--r--lib/nanoparticle.rb40
-rw-r--r--lib/similarity.rb46
-rw-r--r--lib/validation-statistics.rb24
-rw-r--r--lib/validation.rb10
-rw-r--r--test/compound.rb13
-rw-r--r--test/nanoparticles.rb15
-rw-r--r--test/setup.rb4
13 files changed, 156 insertions, 132 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 2554d54..89e9db2 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -254,13 +254,15 @@ module OpenTox
self["chemblid"]
end
-# def fingerprint_count_neighbors params
-# # TODO fix
-# neighbors = []
-# query_fingerprint = self.fingerprint params[:type]
-# training_dataset = Dataset.find(params[:training_dataset_id]).compounds.each do |compound|
-# unless self == compound
-# candidate_fingerprint = compound.fingerprint params[:type]
+=begin
+ def fingerprint_neighbors(type:, min_sim: 0.1, dataset_id:, prediction_feature_id:)
+ neighbors = []
+ dataset = Dataset.find(dataset_id)
+ query_fingerprint = self.fingerprint type
+ dataset.compounds.each do |compound|
+ values = dataset.values(compound,prediction_feature_id)
+ if values
+ candidate_fingerprint = compound.fingerprint type
# features = (query_fingerprint + candidate_fingerprint).uniq
# min_sum = 0
# max_sum = 0
@@ -274,7 +276,13 @@ module OpenTox
# end
# end
# neighbors.sort{|a,b| b.last <=> a.last}
-# end
+ sim = Algorithm::Similarity.tanimoto(query_fingerprint , candidate_fingerprint)
+ neighbors << {"_id" => compound.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
+ end
+ end
+ neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]}
+ end
+=end
def fingerprint_neighbors(type:, min_sim: 0.1, dataset_id:, prediction_feature_id:)
neighbors = []
@@ -294,9 +302,8 @@ module OpenTox
neighbors << {"_id" => compound.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
end
end
- neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
end
- neighbors
+ neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]}
end
# def physchem_neighbors params
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index da4b731..357f0fa 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -41,6 +41,7 @@ module OpenTox
$logger.debug "Dataset #{training_dataset.name}: Fold #{fold_nr} started"
t = Time.now
validation = Validation.create(model, fold[0], fold[1],cv)
+ #p validation
$logger.debug "Dataset #{training_dataset.name}, Fold #{fold_nr}: #{Time.now-t} seconds"
#end
end
@@ -166,29 +167,10 @@ module OpenTox
end
def correlation_plot
- #unless correlation_plot_id
- tmpfile = "/tmp/#{id.to_s}_correlation.png"
- x = []
- y = []
- predictions.each do |sid,p|
- x << p["value"]
- y << p["measured"].median
- end
- attributes = Model::Lazar.find(self.model_id).attributes
- attributes.delete_if{|key,_| key.match(/_id|_at/) or ["_id","creator","name"].include? key}
- attributes = attributes.values.collect{|v| v.is_a?(String) ? v.sub(/OpenTox::/,'') : v}.join("\n")
- R.assign "measurement", x
- R.assign "prediction", y
- R.eval "all = c(measurement,prediction)"
- R.eval "range = c(min(all), max(all))"
- R.eval "image = qplot(prediction,measurement,main='#{self.name}',asp=1,xlim=range, ylim=range)"
- R.eval "image = image + geom_abline(intercept=0, slope=1)"
- #R.eval "ggsave(file='#{tmpfile}', plot=image)"
- R.eval "ggsave(file='#{tmpfile}')"
- file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.png")
- plot_id = $gridfs.insert_one(file)
+ unless correlation_plot_id
+ plot_id = ValidationStatistics.correlation_plot predictions
update(:correlation_plot_id => plot_id)
- #end
+ end
$gridfs.find_one(_id: correlation_plot_id).data
end
end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 8c7fe68..205f640 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -5,8 +5,8 @@ module OpenTox
class Dataset
- field :substance_ids, type: Array, default: []
- field :feature_ids, type: Array, default: []
+ #field :substance_ids, type: Array, default: []
+ #field :feature_ids, type: Array, default: []
field :data_entries, type: Hash, default: {}
# Readers
@@ -21,13 +21,14 @@ module OpenTox
# Get all substances
def substances
- @substances ||= substance_ids.collect{|id| OpenTox::Substance.find id}
+ @substances ||= data_entries.keys.collect{|id| OpenTox::Substance.find id}.uniq
@substances
end
# Get all features
def features
- @features ||= feature_ids.collect{|id| OpenTox::Feature.find(id)}
+ #@features ||= feature_ids.collect{|id| OpenTox::Feature.find(id)}
+ @features ||= data_entries.collect{|sid,data| data.keys.collect{|id| OpenTox::Feature.find(id)}}.flatten.uniq
@features
end
@@ -58,7 +59,11 @@ module OpenTox
feature = feature.id if feature.is_a? Feature
data_entries[substance.to_s] ||= {}
data_entries[substance.to_s][feature.to_s] ||= []
- data_entries[substance.to_s][feature.to_s] << value
+ if value.is_a? Array
+ data_entries[substance.to_s][feature.to_s] += value
+ else
+ data_entries[substance.to_s][feature.to_s] << value
+ end
end
# Dataset operations
@@ -67,7 +72,7 @@ module OpenTox
# @param [Integer] number of folds
# @return [Array] Array with folds [training_dataset,test_dataset]
def folds n
- len = self.substance_ids.size
+ len = self.substances.size
indices = (0..len-1).to_a.shuffle
mid = (len/n)
chunks = []
@@ -76,12 +81,14 @@ module OpenTox
last = start+mid
last = last-1 unless len%n >= i
test_idxs = indices[start..last] || []
- test_cids = test_idxs.collect{|i| substance_ids[i]}
+ test_substances = test_idxs.collect{|i| substances[i]}
training_idxs = indices-test_idxs
- training_cids = training_idxs.collect{|i| substance_ids[i]}
- chunk = [training_cids,test_cids].collect do |cids|
- dataset = self.class.create(:substance_ids => cids, :feature_ids => feature_ids, :source => self.id )
- dataset.substances.each do |substance|
+ training_substances = training_idxs.collect{|i| substances[i]}
+ chunk = [training_substances,test_substances].collect do |substances|
+ dataset = self.class.create(:source => self.id )
+ substances.each do |substance|
+ #dataset = self.class.create(:substance_ids => cids, :feature_ids => feature_ids, :source => self.id )
+ #dataset.substances.each do |substance|
substance.dataset_ids << dataset.id
substance.save
dataset.data_entries[substance.id.to_s] = data_entries[substance.id.to_s] ||= {}
@@ -170,6 +177,7 @@ module OpenTox
compound_format = feature_names.shift.strip
bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i
numeric = []
+ features = []
# guess feature types
feature_names.each_with_index do |f,i|
metadata = {:name => f}
@@ -187,7 +195,7 @@ module OpenTox
numeric[i] = false
feature = NominalFeature.find_or_create_by(metadata)
end
- feature_ids << feature.id if feature
+ features << feature if feature
end
# substances and values
@@ -210,12 +218,10 @@ module OpenTox
warn "Cannot parse #{compound_format} compound '#{identifier}' at position #{i+2}, all entries are ignored."
next
end
- substance_ids << substance.id
- data_entries[substance.id.to_s] = {}
substance.dataset_ids << self.id unless substance.dataset_ids.include? self.id
substance.save
- unless vals.size == feature_ids.size
+ unless vals.size == features.size
warn "Number of values at position #{i+2} is different than header size (#{vals.size} vs. #{features.size}), all entries are ignored."
next
end
@@ -229,8 +235,7 @@ module OpenTox
else
v = v.strip
end
- data_entries[substance.id.to_s][feature_ids[j].to_s] ||= []
- data_entries[substance.id.to_s][feature_ids[j].to_s] << v
+ add substance, features[j], v
end
end
substances.duplicates.each do |substance|
@@ -238,8 +243,6 @@ module OpenTox
substances.each_with_index{|c,i| positions << i+1 if !c.blank? and c.inchi and c.inchi == substance.inchi}
warn "Duplicate compound #{substance.smiles} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments."
end
- substance_ids.uniq!
- feature_ids.uniq!
save
end
diff --git a/lib/import.rb b/lib/import.rb
index 3c6966e..2dcc361 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -39,7 +39,6 @@ module OpenTox
:source => np["compound"]["URI"],
)
np["bundles"].keys.each do |bundle_uri|
- #datasets[bundle_uri].substance_ids << nanoparticle.id
nanoparticle["dataset_ids"] << datasets[bundle_uri].id
end
bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1
@@ -59,7 +58,7 @@ module OpenTox
end
else
feature = klass.find_or_create_by(
- :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
+ :name => effect["endpoint"],
:unit => effect["result"]["unit"],
:category => study["protocol"]["topcategory"],
:conditions => effect["conditions"]
@@ -69,11 +68,7 @@ module OpenTox
end
nanoparticle.save
end
- datasets.each do |u,d|
- d.feature_ids.uniq!
- d.substance_ids.uniq!
- d.save
- end
+ datasets.each { |u,d| d.save }
end
=begin
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 55de511..7bd87f4 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -80,10 +80,10 @@ CLASSES = ["Feature","Substance","Dataset","LazarPrediction","Validation","Cross
"model.rb",
"classification.rb",
"regression.rb",
+ "validation-statistics.rb",
"validation.rb",
"crossvalidation.rb",
"leave-one-out-validation.rb",
- "validation-statistics.rb",
"experiment.rb",
"import.rb",
].each{ |f| require_relative f }
diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb
index 7189617..b8deae9 100644
--- a/lib/leave-one-out-validation.rb
+++ b/lib/leave-one-out-validation.rb
@@ -13,18 +13,18 @@ module OpenTox
t = Time.now
model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation
loo = klass.new :model_id => model.id
- predictions = model.predict model.training_dataset.compounds
+ predictions = model.predict model.training_dataset.substances
predictions.each{|cid,p| p.delete(:neighbors)}
nr_unpredicted = 0
predictions.each do |cid,prediction|
if prediction[:value]
- tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s]
- prediction[:measured] = tox[model.training_dataset_id.to_s] if tox
+ prediction[:measured] = model.training_dataset.values(cid, prediction[:prediction_feature_id])
else
nr_unpredicted += 1
end
predictions.delete(cid) unless prediction[:value] and prediction[:measured]
end
+ predictions.select!{|cid,p| p[:value] and p[:measured]}
loo.nr_instances = predictions.size
loo.nr_unpredicted = nr_unpredicted
loo.predictions = predictions
@@ -86,6 +86,7 @@ module OpenTox
class RegressionLeaveOneOutValidation < LeaveOneOutValidation
+ include Plot
field :rmse, type: Float, default: 0
field :mae, type: Float, default: 0
@@ -100,29 +101,7 @@ module OpenTox
def correlation_plot
unless correlation_plot_id
- tmpfile = "/tmp/#{id.to_s}_correlation.svg"
- predicted_values = []
- measured_values = []
- predictions.each do |pred|
- pred[:database_activities].each do |activity|
- if pred[:value]
- predicted_values << pred[:value]
- measured_values << activity
- end
- end
- end
- attributes = Model::Lazar.find(self.model_id).attributes
- attributes.delete_if{|key,_| key.match(/_id|_at/) or ["_id","creator","name"].include? key}
- attributes = attributes.values.collect{|v| v.is_a?(String) ? v.sub(/OpenTox::/,'') : v}.join("\n")
- R.assign "measurement", measured_values
- R.assign "prediction", predicted_values
- R.eval "all = c(-log(measurement),-log(prediction))"
- R.eval "range = c(min(all), max(all))"
- R.eval "image = qplot(-log(prediction),-log(measurement),main='#{self.name}',asp=1,xlim=range, ylim=range)"
- R.eval "image = image + geom_abline(intercept=0, slope=1)"
- R.eval "ggsave(file='#{tmpfile}', plot=image)"
- file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.svg")
- plot_id = $gridfs.insert_one(file)
+ #plot_id = correlation_plot
update(:correlation_plot_id => plot_id)
end
$gridfs.find_one(_id: correlation_plot_id).data
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 6527fa3..7890a19 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -11,19 +11,14 @@ module OpenTox
def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id:
dataset = Dataset.find(dataset_id)
neighbors = []
- p dataset.data_entries.size
- p dataset.substance_ids.size
- p dataset.substance_ids.collect{|i| i.to_s} == dataset.data_entries.keys
- p dataset.substance_ids.collect{|i| i.to_s}
- p dataset.data_entries.keys
dataset.nanoparticles.each do |np|
- prediction_feature_id
- p dataset.data_entries[np.id.to_s]
values = dataset.values(np,prediction_feature_id)
- p values
if values
common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys
- sim = Algorithm::Similarity.cosine(common_descriptors.collect{|d| physchem_descriptors[d]}, common_descriptors.collect{|d| np.physchem_descriptors[d]})
+ common_descriptors.select!{|id| NumericFeature.find(id) }
+ query_descriptors = common_descriptors.collect{|d| physchem_descriptors[d].first}
+ neighbor_descriptors = common_descriptors.collect{|d| np.physchem_descriptors[d].first}
+ sim = Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors)
neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
end
end
@@ -44,12 +39,7 @@ module OpenTox
proteomics[feature.id.to_s].uniq!
when "TOX"
# TODO generic way of parsing TOX values
- p dataset.name
- p self.name
- p feature.name
- p feature.unit
- p value
- if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)"
+ if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)"
dataset.add self, feature, -Math.log10(value)
else
dataset.add self, feature, value
@@ -70,32 +60,32 @@ module OpenTox
add_feature feature, v["loValue"], dataset
elsif v.keys.size == 2 and v["errorValue"]
add_feature feature, v["loValue"], dataset
- #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+ warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
elsif v.keys.size == 2 and v["loQualifier"] == "mean"
add_feature feature, v["loValue"], dataset
- #warn "'#{feature.name}' is a mean value. Original data is not available."
+ warn "'#{feature.name}' is a mean value. Original data is not available."
elsif v.keys.size == 2 and v["loQualifier"] #== ">="
- #warn "Only min value available for '#{feature.name}', entry ignored"
+ warn "Only min value available for '#{feature.name}', entry ignored"
elsif v.keys.size == 2 and v["upQualifier"] #== ">="
- #warn "Only max value available for '#{feature.name}', entry ignored"
+ warn "Only max value available for '#{feature.name}', entry ignored"
elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
add_feature feature, v["loValue"], dataset
- #warn "loQualifier and upQualifier are empty."
+ warn "loQualifier and upQualifier are empty."
elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
add_feature feature, v["loValue"], dataset
- #warn "loQualifier and upQualifier are empty."
+ warn "loQualifier and upQualifier are empty."
elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
add_feature feature, v["loValue"], dataset
- #warn "loQualifier and upQualifier are empty."
+ warn "loQualifier and upQualifier are empty."
elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
- #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+ warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
- #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+ warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
add_feature feature, v["loValue"], dataset
elsif v == {} # do nothing
else
- #warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
+ warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
end
end
diff --git a/lib/similarity.rb b/lib/similarity.rb
new file mode 100644
index 0000000..f25d4c3
--- /dev/null
+++ b/lib/similarity.rb
@@ -0,0 +1,46 @@
+module OpenTox
+ module Algorithm
+
+ class Vector
+ def self.dot_product(a, b)
+ products = a.zip(b).map{|a, b| a * b}
+ products.inject(0) {|s,p| s + p}
+ end
+
+ def self.magnitude(point)
+ squares = point.map{|x| x ** 2}
+ Math.sqrt(squares.inject(0) {|s, c| s + c})
+ end
+ end
+
+ class Similarity
+
+ def self.tanimoto a, b
+ ( a & b).size/(a|b).size.to_f
+ end
+
+ def self.euclid a, b
+ sq = a.zip(b).map{|a,b| (a - b) ** 2}
+ Math.sqrt(sq.inject(0) {|s,c| s + c})
+ end
+
+ # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
+ def self.cosine a, b
+ Algorithm::Vector.dot_product(a, b) / (Algorithm::Vector.magnitude(a) * Algorithm::Vector.magnitude(b))
+ end
+
+ def self.weighted_cosine(a, b, w)
+ dot_product = 0
+ magnitude_a = 0
+ magnitude_b = 0
+ (0..a.size-1).each do |i|
+ dot_product += w[i].abs*a[i]*b[i]
+ magnitude_a += w[i].abs*a[i]**2
+ magnitude_b += w[i].abs*b[i]**2
+ end
+ dot_product/Math.sqrt(magnitude_a*magnitude_b)
+ end
+
+ end
+ end
+end
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 0079bae..2d6b56e 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -96,5 +96,29 @@ module OpenTox
:finished_at => Time.now
}
end
+
+ end
+
+ module Plot
+
+ def plot_id
+ tmpfile = "/tmp/#{id.to_s}_correlation.png"
+ x = []
+ y = []
+ predictions.each do |sid,p|
+ x << p["value"]
+ y << p["measured"].median
+ end
+ R.assign "measurement", x
+ R.assign "prediction", y
+ R.eval "all = c(measurement,prediction)"
+ R.eval "range = c(min(all), max(all))"
+ R.eval "image = qplot(prediction,measurement,main='',asp=1,xlim=range, ylim=range)"
+ R.eval "image = image + geom_abline(intercept=0, slope=1)"
+ R.eval "ggsave(file='#{tmpfile}', plot=image)"
+ file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.png")
+ plot_id = $gridfs.insert_one(file)
+ plot_id
+ end
end
end
diff --git a/lib/validation.rb b/lib/validation.rb
index 015e718..9122df1 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -32,20 +32,14 @@ module OpenTox
predictions = validation_model.predict test_set.substances
predictions.each{|cid,p| p.delete(:neighbors)}
nr_unpredicted = 0
- p predictions.size
predictions.each do |cid,prediction|
- p prediction
if prediction[:value]
- tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s]
- p tox
- #prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][test_set.id.to_s]
- prediction[:measured] = tox[test_set.id.to_s] if tox
+ prediction[:measured] = test_set.values(cid, prediction[:prediction_feature_id])
else
nr_unpredicted += 1
end
- predictions.delete(cid) unless prediction[:value] and prediction[:measured]
end
- p predictions.size
+ predictions.select!{|cid,p| p[:value] and p[:measured]}
validation = self.new(
:model_id => validation_model.id,
:test_dataset_id => test_set.id,
diff --git a/test/compound.rb b/test/compound.rb
index 29d97a9..992463b 100644
--- a/test/compound.rb
+++ b/test/compound.rb
@@ -85,8 +85,8 @@ print c.sdf
refute_nil c.fingerprint("MP2D")
end
c = d.compounds[371]
- n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :training_dataset_id => d.id })
- assert n.size >= 18, "Neighbors size (#{n.size}) should be larger than 17"
+ n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :dataset_id => d.id, :prediction_feature_id => d.features.first.id })
+ assert n.size >= 8, "Neighbors size (#{n.size}) should be larger than 7"
end
def test_openbabel_segfault
@@ -118,7 +118,7 @@ print c.sdf
].each do |smi|
c = OpenTox::Compound.from_smiles smi
types.each do |type|
- neighbors = c.fingerprint_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
+ neighbors = c.fingerprint_neighbors({:type => type, :dataset_id => training_dataset.id, :min_sim => min_sim, :prediction_feature_id => training_dataset.features.first.id})
unless type == "FP2" and smi == "CC(=O)CC(C)C#N" or smi == "C(=O)CC(C)C#N" and (type == "FP2" or type == "MACCS")
refute_empty neighbors
end
@@ -139,6 +139,7 @@ print c.sdf
end
def test_fingerprint_count_neighbors
+ skip
types = ["MP2D", "MNA"]
min_sim = 0.0
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM_log10.csv")
@@ -149,7 +150,7 @@ print c.sdf
].each do |smi|
c = OpenTox::Compound.from_smiles smi
types.each do |type|
- neighbors = c.fingerprint_count_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
+ neighbors = c.fingerprint_count_neighbors({:type => type, :dataset_id => training_dataset.id, :min_sim => min_sim, :prediction_feature_id => training_dataset.features.first.id})
if type == "FP4"
fp4_neighbors = c.neighbors
neighbors.each do |n|
@@ -170,10 +171,10 @@ print c.sdf
].each do |smi|
c = OpenTox::Compound.from_smiles smi
t = Time.now
- neighbors = c.db_neighbors(:training_dataset_id => training_dataset.id, :min_sim => 0.2)
+ neighbors = c.db_neighbors(:dataset_id => training_dataset.id, :min_sim => 0.2)
p Time.now - t
t = Time.now
- neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :training_dataset_id => training_dataset.id, :min_sim => 0.2})
+ neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :dataset_id => training_dataset.id, :min_sim => 0.2, :prediction_feature_id => training_dataset.features.first.id})
p Time.now - t
p neighbors.size
p neighbors2.size
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index e1b8788..897552d 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -4,7 +4,7 @@ require_relative "setup.rb"
class NanoparticleTest < MiniTest::Test
def setup
- Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
+ #Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
#`mongorestore --db=development #{File.join(File.dirname(__FILE__),"..","dump","production")}`
end
@@ -23,18 +23,20 @@ class NanoparticleTest < MiniTest::Test
def test_create_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
- feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"})
+ #p training_dataset.nanoparticles.size
+ feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"})
+ #model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"})
nanoparticle = training_dataset.nanoparticles[-34]
prediction = model.predict nanoparticle
p prediction
- #p prediction
refute_nil prediction[:value]
end
def test_validate_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
- feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)")
+ feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
+ #feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)")
model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"})
p model
cv = RegressionCrossValidation.create model
@@ -43,7 +45,8 @@ class NanoparticleTest < MiniTest::Test
def test_validate_pls_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
- feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)")
+ feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
+ #feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)")
model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"})
#model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"})
p model
diff --git a/test/setup.rb b/test/setup.rb
index e7c32b4..6c97282 100644
--- a/test/setup.rb
+++ b/test/setup.rb
@@ -5,5 +5,5 @@ require_relative '../lib/lazar.rb'
include OpenTox
TEST_DIR ||= File.expand_path(File.dirname(__FILE__))
DATA_DIR ||= File.join(TEST_DIR,"data")
-$mongo.database.drop
-$gridfs = $mongo.database.fs
+#$mongo.database.drop
+#$gridfs = $mongo.database.fs