From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 13 May 2016 13:38:24 +0200 Subject: compound tests fixed --- lib/compound.rb | 27 +++++++++++++++--------- lib/crossvalidation.rb | 26 ++++------------------- lib/dataset.rb | 41 +++++++++++++++++++----------------- lib/import.rb | 9 ++------ lib/lazar.rb | 2 +- lib/leave-one-out-validation.rb | 31 +++++---------------------- lib/nanoparticle.rb | 40 ++++++++++++++--------------------- lib/similarity.rb | 46 +++++++++++++++++++++++++++++++++++++++++ lib/validation-statistics.rb | 24 +++++++++++++++++++++ lib/validation.rb | 10 ++------- test/compound.rb | 13 ++++++------ test/nanoparticles.rb | 15 ++++++++------ test/setup.rb | 4 ++-- 13 files changed, 156 insertions(+), 132 deletions(-) create mode 100644 lib/similarity.rb diff --git a/lib/compound.rb b/lib/compound.rb index 2554d54..89e9db2 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -254,13 +254,15 @@ module OpenTox self["chemblid"] end -# def fingerprint_count_neighbors params -# # TODO fix -# neighbors = [] -# query_fingerprint = self.fingerprint params[:type] -# training_dataset = Dataset.find(params[:training_dataset_id]).compounds.each do |compound| -# unless self == compound -# candidate_fingerprint = compound.fingerprint params[:type] +=begin + def fingerprint_neighbors(type:, min_sim: 0.1, dataset_id:, prediction_feature_id:) + neighbors = [] + dataset = Dataset.find(dataset_id) + query_fingerprint = self.fingerprint type + dataset.compounds.each do |compound| + values = dataset.values(compound,prediction_feature_id) + if values + candidate_fingerprint = compound.fingerprint type # features = (query_fingerprint + candidate_fingerprint).uniq # min_sum = 0 # max_sum = 0 @@ -274,7 +276,13 @@ module OpenTox # end # end # neighbors.sort{|a,b| b.last <=> a.last} -# end + sim = Algorithm::Similarity.tanimoto(query_fingerprint , candidate_fingerprint) + neighbors << {"_id" => compound.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim + end + end + neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]} + end +=end def fingerprint_neighbors(type:, min_sim: 0.1, dataset_id:, prediction_feature_id:) neighbors = [] @@ -294,9 +302,8 @@ module OpenTox neighbors << {"_id" => compound.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim end end - neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} end - neighbors + neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]} end # def physchem_neighbors params diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index da4b731..357f0fa 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -41,6 +41,7 @@ module OpenTox $logger.debug "Dataset #{training_dataset.name}: Fold #{fold_nr} started" t = Time.now validation = Validation.create(model, fold[0], fold[1],cv) + #p validation $logger.debug "Dataset #{training_dataset.name}, Fold #{fold_nr}: #{Time.now-t} seconds" #end end @@ -166,29 +167,10 @@ module OpenTox end def correlation_plot - #unless correlation_plot_id - tmpfile = "/tmp/#{id.to_s}_correlation.png" - x = [] - y = [] - predictions.each do |sid,p| - x << p["value"] - y << p["measured"].median - end - attributes = Model::Lazar.find(self.model_id).attributes - attributes.delete_if{|key,_| key.match(/_id|_at/) or ["_id","creator","name"].include? key} - attributes = attributes.values.collect{|v| v.is_a?(String) ? v.sub(/OpenTox::/,'') : v}.join("\n") - R.assign "measurement", x - R.assign "prediction", y - R.eval "all = c(measurement,prediction)" - R.eval "range = c(min(all), max(all))" - R.eval "image = qplot(prediction,measurement,main='#{self.name}',asp=1,xlim=range, ylim=range)" - R.eval "image = image + geom_abline(intercept=0, slope=1)" - #R.eval "ggsave(file='#{tmpfile}', plot=image)" - R.eval "ggsave(file='#{tmpfile}')" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.png") - plot_id = $gridfs.insert_one(file) + unless correlation_plot_id + plot_id = ValidationStatistics.correlation_plot predictions update(:correlation_plot_id => plot_id) - #end + end $gridfs.find_one(_id: correlation_plot_id).data end end diff --git a/lib/dataset.rb b/lib/dataset.rb index 8c7fe68..205f640 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -5,8 +5,8 @@ module OpenTox class Dataset - field :substance_ids, type: Array, default: [] - field :feature_ids, type: Array, default: [] + #field :substance_ids, type: Array, default: [] + #field :feature_ids, type: Array, default: [] field :data_entries, type: Hash, default: {} # Readers @@ -21,13 +21,14 @@ module OpenTox # Get all substances def substances - @substances ||= substance_ids.collect{|id| OpenTox::Substance.find id} + @substances ||= data_entries.keys.collect{|id| OpenTox::Substance.find id}.uniq @substances end # Get all features def features - @features ||= feature_ids.collect{|id| OpenTox::Feature.find(id)} + #@features ||= feature_ids.collect{|id| OpenTox::Feature.find(id)} + @features ||= data_entries.collect{|sid,data| data.keys.collect{|id| OpenTox::Feature.find(id)}}.flatten.uniq @features end @@ -58,7 +59,11 @@ module OpenTox feature = feature.id if feature.is_a? Feature data_entries[substance.to_s] ||= {} data_entries[substance.to_s][feature.to_s] ||= [] - data_entries[substance.to_s][feature.to_s] << value + if value.is_a? Array + data_entries[substance.to_s][feature.to_s] += value + else + data_entries[substance.to_s][feature.to_s] << value + end end # Dataset operations @@ -67,7 +72,7 @@ module OpenTox # @param [Integer] number of folds # @return [Array] Array with folds [training_dataset,test_dataset] def folds n - len = self.substance_ids.size + len = self.substances.size indices = (0..len-1).to_a.shuffle mid = (len/n) chunks = [] @@ -76,12 +81,14 @@ module OpenTox last = start+mid last = last-1 unless len%n >= i test_idxs = indices[start..last] || [] - test_cids = test_idxs.collect{|i| substance_ids[i]} + test_substances = test_idxs.collect{|i| substances[i]} training_idxs = indices-test_idxs - training_cids = training_idxs.collect{|i| substance_ids[i]} - chunk = [training_cids,test_cids].collect do |cids| - dataset = self.class.create(:substance_ids => cids, :feature_ids => feature_ids, :source => self.id ) - dataset.substances.each do |substance| + training_substances = training_idxs.collect{|i| substances[i]} + chunk = [training_substances,test_substances].collect do |substances| + dataset = self.class.create(:source => self.id ) + substances.each do |substance| + #dataset = self.class.create(:substance_ids => cids, :feature_ids => feature_ids, :source => self.id ) + #dataset.substances.each do |substance| substance.dataset_ids << dataset.id substance.save dataset.data_entries[substance.id.to_s] = data_entries[substance.id.to_s] ||= {} @@ -170,6 +177,7 @@ module OpenTox compound_format = feature_names.shift.strip bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i numeric = [] + features = [] # guess feature types feature_names.each_with_index do |f,i| metadata = {:name => f} @@ -187,7 +195,7 @@ module OpenTox numeric[i] = false feature = NominalFeature.find_or_create_by(metadata) end - feature_ids << feature.id if feature + features << feature if feature end # substances and values @@ -210,12 +218,10 @@ module OpenTox warn "Cannot parse #{compound_format} compound '#{identifier}' at position #{i+2}, all entries are ignored." next end - substance_ids << substance.id - data_entries[substance.id.to_s] = {} substance.dataset_ids << self.id unless substance.dataset_ids.include? self.id substance.save - unless vals.size == feature_ids.size + unless vals.size == features.size warn "Number of values at position #{i+2} is different than header size (#{vals.size} vs. #{features.size}), all entries are ignored." next end @@ -229,8 +235,7 @@ module OpenTox else v = v.strip end - data_entries[substance.id.to_s][feature_ids[j].to_s] ||= [] - data_entries[substance.id.to_s][feature_ids[j].to_s] << v + add substance, features[j], v end end substances.duplicates.each do |substance| @@ -238,8 +243,6 @@ module OpenTox substances.each_with_index{|c,i| positions << i+1 if !c.blank? and c.inchi and c.inchi == substance.inchi} warn "Duplicate compound #{substance.smiles} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments." end - substance_ids.uniq! - feature_ids.uniq! save end diff --git a/lib/import.rb b/lib/import.rb index 3c6966e..2dcc361 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -39,7 +39,6 @@ module OpenTox :source => np["compound"]["URI"], ) np["bundles"].keys.each do |bundle_uri| - #datasets[bundle_uri].substance_ids << nanoparticle.id nanoparticle["dataset_ids"] << datasets[bundle_uri].id end bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1 @@ -59,7 +58,7 @@ module OpenTox end else feature = klass.find_or_create_by( - :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}", + :name => effect["endpoint"], :unit => effect["result"]["unit"], :category => study["protocol"]["topcategory"], :conditions => effect["conditions"] @@ -69,11 +68,7 @@ module OpenTox end nanoparticle.save end - datasets.each do |u,d| - d.feature_ids.uniq! - d.substance_ids.uniq! - d.save - end + datasets.each { |u,d| d.save } end =begin diff --git a/lib/lazar.rb b/lib/lazar.rb index 55de511..7bd87f4 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -80,10 +80,10 @@ CLASSES = ["Feature","Substance","Dataset","LazarPrediction","Validation","Cross "model.rb", "classification.rb", "regression.rb", + "validation-statistics.rb", "validation.rb", "crossvalidation.rb", "leave-one-out-validation.rb", - "validation-statistics.rb", "experiment.rb", "import.rb", ].each{ |f| require_relative f } diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 7189617..b8deae9 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -13,18 +13,18 @@ module OpenTox t = Time.now model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation loo = klass.new :model_id => model.id - predictions = model.predict model.training_dataset.compounds + predictions = model.predict model.training_dataset.substances predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 predictions.each do |cid,prediction| if prediction[:value] - tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] - prediction[:measured] = tox[model.training_dataset_id.to_s] if tox + prediction[:measured] = model.training_dataset.values(cid, prediction[:prediction_feature_id]) else nr_unpredicted += 1 end predictions.delete(cid) unless prediction[:value] and prediction[:measured] end + predictions.select!{|cid,p| p[:value] and p[:measured]} loo.nr_instances = predictions.size loo.nr_unpredicted = nr_unpredicted loo.predictions = predictions @@ -86,6 +86,7 @@ module OpenTox class RegressionLeaveOneOutValidation < LeaveOneOutValidation + include Plot field :rmse, type: Float, default: 0 field :mae, type: Float, default: 0 @@ -100,29 +101,7 @@ module OpenTox def correlation_plot unless correlation_plot_id - tmpfile = "/tmp/#{id.to_s}_correlation.svg" - predicted_values = [] - measured_values = [] - predictions.each do |pred| - pred[:database_activities].each do |activity| - if pred[:value] - predicted_values << pred[:value] - measured_values << activity - end - end - end - attributes = Model::Lazar.find(self.model_id).attributes - attributes.delete_if{|key,_| key.match(/_id|_at/) or ["_id","creator","name"].include? key} - attributes = attributes.values.collect{|v| v.is_a?(String) ? v.sub(/OpenTox::/,'') : v}.join("\n") - R.assign "measurement", measured_values - R.assign "prediction", predicted_values - R.eval "all = c(-log(measurement),-log(prediction))" - R.eval "range = c(min(all), max(all))" - R.eval "image = qplot(-log(prediction),-log(measurement),main='#{self.name}',asp=1,xlim=range, ylim=range)" - R.eval "image = image + geom_abline(intercept=0, slope=1)" - R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.svg") - plot_id = $gridfs.insert_one(file) + #plot_id = correlation_plot update(:correlation_plot_id => plot_id) end $gridfs.find_one(_id: correlation_plot_id).data diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 6527fa3..7890a19 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -11,19 +11,14 @@ module OpenTox def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id: dataset = Dataset.find(dataset_id) neighbors = [] - p dataset.data_entries.size - p dataset.substance_ids.size - p dataset.substance_ids.collect{|i| i.to_s} == dataset.data_entries.keys - p dataset.substance_ids.collect{|i| i.to_s} - p dataset.data_entries.keys dataset.nanoparticles.each do |np| - prediction_feature_id - p dataset.data_entries[np.id.to_s] values = dataset.values(np,prediction_feature_id) - p values if values common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys - sim = Algorithm::Similarity.cosine(common_descriptors.collect{|d| physchem_descriptors[d]}, common_descriptors.collect{|d| np.physchem_descriptors[d]}) + common_descriptors.select!{|id| NumericFeature.find(id) } + query_descriptors = common_descriptors.collect{|d| physchem_descriptors[d].first} + neighbor_descriptors = common_descriptors.collect{|d| np.physchem_descriptors[d].first} + sim = Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors) neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim end end @@ -44,12 +39,7 @@ module OpenTox proteomics[feature.id.to_s].uniq! when "TOX" # TODO generic way of parsing TOX values - p dataset.name - p self.name - p feature.name - p feature.unit - p value - if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" + if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" dataset.add self, feature, -Math.log10(value) else dataset.add self, feature, value @@ -70,32 +60,32 @@ module OpenTox add_feature feature, v["loValue"], dataset elsif v.keys.size == 2 and v["errorValue"] add_feature feature, v["loValue"], dataset - #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." elsif v.keys.size == 2 and v["loQualifier"] == "mean" add_feature feature, v["loValue"], dataset - #warn "'#{feature.name}' is a mean value. Original data is not available." + warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" - #warn "Only min value available for '#{feature.name}', entry ignored" + warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" - #warn "Only max value available for '#{feature.name}', entry ignored" + warn "Only max value available for '#{feature.name}', entry ignored" elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? add_feature feature, v["loValue"], dataset - #warn "loQualifier and upQualifier are empty." + warn "loQualifier and upQualifier are empty." elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" add_feature feature, v["loValue"], dataset - #warn "loQualifier and upQualifier are empty." + warn "loQualifier and upQualifier are empty." elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? add_feature feature, v["loValue"], dataset - #warn "loQualifier and upQualifier are empty." + warn "loQualifier and upQualifier are empty." elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] add_feature feature, [v["loValue"],v["upValue"]].mean, dataset - #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." + warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"] - #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." add_feature feature, v["loValue"], dataset elsif v == {} # do nothing else - #warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." end end diff --git a/lib/similarity.rb b/lib/similarity.rb new file mode 100644 index 0000000..f25d4c3 --- /dev/null +++ b/lib/similarity.rb @@ -0,0 +1,46 @@ +module OpenTox + module Algorithm + + class Vector + def self.dot_product(a, b) + products = a.zip(b).map{|a, b| a * b} + products.inject(0) {|s,p| s + p} + end + + def self.magnitude(point) + squares = point.map{|x| x ** 2} + Math.sqrt(squares.inject(0) {|s, c| s + c}) + end + end + + class Similarity + + def self.tanimoto a, b + ( a & b).size/(a|b).size.to_f + end + + def self.euclid a, b + sq = a.zip(b).map{|a,b| (a - b) ** 2} + Math.sqrt(sq.inject(0) {|s,c| s + c}) + end + + # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity + def self.cosine a, b + Algorithm::Vector.dot_product(a, b) / (Algorithm::Vector.magnitude(a) * Algorithm::Vector.magnitude(b)) + end + + def self.weighted_cosine(a, b, w) + dot_product = 0 + magnitude_a = 0 + magnitude_b = 0 + (0..a.size-1).each do |i| + dot_product += w[i].abs*a[i]*b[i] + magnitude_a += w[i].abs*a[i]**2 + magnitude_b += w[i].abs*b[i]**2 + end + dot_product/Math.sqrt(magnitude_a*magnitude_b) + end + + end + end +end diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index 0079bae..2d6b56e 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -96,5 +96,29 @@ module OpenTox :finished_at => Time.now } end + + end + + module Plot + + def plot_id + tmpfile = "/tmp/#{id.to_s}_correlation.png" + x = [] + y = [] + predictions.each do |sid,p| + x << p["value"] + y << p["measured"].median + end + R.assign "measurement", x + R.assign "prediction", y + R.eval "all = c(measurement,prediction)" + R.eval "range = c(min(all), max(all))" + R.eval "image = qplot(prediction,measurement,main='',asp=1,xlim=range, ylim=range)" + R.eval "image = image + geom_abline(intercept=0, slope=1)" + R.eval "ggsave(file='#{tmpfile}', plot=image)" + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.png") + plot_id = $gridfs.insert_one(file) + plot_id + end end end diff --git a/lib/validation.rb b/lib/validation.rb index 015e718..9122df1 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -32,20 +32,14 @@ module OpenTox predictions = validation_model.predict test_set.substances predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 - p predictions.size predictions.each do |cid,prediction| - p prediction if prediction[:value] - tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] - p tox - #prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][test_set.id.to_s] - prediction[:measured] = tox[test_set.id.to_s] if tox + prediction[:measured] = test_set.values(cid, prediction[:prediction_feature_id]) else nr_unpredicted += 1 end - predictions.delete(cid) unless prediction[:value] and prediction[:measured] end - p predictions.size + predictions.select!{|cid,p| p[:value] and p[:measured]} validation = self.new( :model_id => validation_model.id, :test_dataset_id => test_set.id, diff --git a/test/compound.rb b/test/compound.rb index 29d97a9..992463b 100644 --- a/test/compound.rb +++ b/test/compound.rb @@ -85,8 +85,8 @@ print c.sdf refute_nil c.fingerprint("MP2D") end c = d.compounds[371] - n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :training_dataset_id => d.id }) - assert n.size >= 18, "Neighbors size (#{n.size}) should be larger than 17" + n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :dataset_id => d.id, :prediction_feature_id => d.features.first.id }) + assert n.size >= 8, "Neighbors size (#{n.size}) should be larger than 7" end def test_openbabel_segfault @@ -118,7 +118,7 @@ print c.sdf ].each do |smi| c = OpenTox::Compound.from_smiles smi types.each do |type| - neighbors = c.fingerprint_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim}) + neighbors = c.fingerprint_neighbors({:type => type, :dataset_id => training_dataset.id, :min_sim => min_sim, :prediction_feature_id => training_dataset.features.first.id}) unless type == "FP2" and smi == "CC(=O)CC(C)C#N" or smi == "C(=O)CC(C)C#N" and (type == "FP2" or type == "MACCS") refute_empty neighbors end @@ -139,6 +139,7 @@ print c.sdf end def test_fingerprint_count_neighbors + skip types = ["MP2D", "MNA"] min_sim = 0.0 training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM_log10.csv") @@ -149,7 +150,7 @@ print c.sdf ].each do |smi| c = OpenTox::Compound.from_smiles smi types.each do |type| - neighbors = c.fingerprint_count_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim}) + neighbors = c.fingerprint_count_neighbors({:type => type, :dataset_id => training_dataset.id, :min_sim => min_sim, :prediction_feature_id => training_dataset.features.first.id}) if type == "FP4" fp4_neighbors = c.neighbors neighbors.each do |n| @@ -170,10 +171,10 @@ print c.sdf ].each do |smi| c = OpenTox::Compound.from_smiles smi t = Time.now - neighbors = c.db_neighbors(:training_dataset_id => training_dataset.id, :min_sim => 0.2) + neighbors = c.db_neighbors(:dataset_id => training_dataset.id, :min_sim => 0.2) p Time.now - t t = Time.now - neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :training_dataset_id => training_dataset.id, :min_sim => 0.2}) + neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :dataset_id => training_dataset.id, :min_sim => 0.2, :prediction_feature_id => training_dataset.features.first.id}) p Time.now - t p neighbors.size p neighbors2.size diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb index e1b8788..897552d 100644 --- a/test/nanoparticles.rb +++ b/test/nanoparticles.rb @@ -4,7 +4,7 @@ require_relative "setup.rb" class NanoparticleTest < MiniTest::Test def setup - Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") + #Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") #`mongorestore --db=development #{File.join(File.dirname(__FILE__),"..","dump","production")}` end @@ -23,18 +23,20 @@ class NanoparticleTest < MiniTest::Test def test_create_model training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles") - feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)") - model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"}) + #p training_dataset.nanoparticles.size + feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)") + model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"}) + #model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"}) nanoparticle = training_dataset.nanoparticles[-34] prediction = model.predict nanoparticle p prediction - #p prediction refute_nil prediction[:value] end def test_validate_model training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles") - feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)") + feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)") + #feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)") model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"}) p model cv = RegressionCrossValidation.create model @@ -43,7 +45,8 @@ class NanoparticleTest < MiniTest::Test def test_validate_pls_model training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles") - feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)") + feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)") + #feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)") model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"}) #model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"}) p model diff --git a/test/setup.rb b/test/setup.rb index e7c32b4..6c97282 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -5,5 +5,5 @@ require_relative '../lib/lazar.rb' include OpenTox TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") -$mongo.database.drop -$gridfs = $mongo.database.fs +#$mongo.database.drop +#$gridfs = $mongo.database.fs -- cgit v1.2.3