summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/compound.rb30
-rw-r--r--lib/crossvalidation.rb2
-rw-r--r--lib/dataset.rb11
-rw-r--r--lib/import.rb57
-rw-r--r--lib/leave-one-out-validation.rb3
-rw-r--r--lib/model.rb16
-rw-r--r--lib/nanoparticle.rb110
-rw-r--r--lib/regression.rb2
-rw-r--r--lib/similarity.rb2
-rw-r--r--lib/validation-statistics.rb13
-rw-r--r--test/dataset.rb1
-rw-r--r--test/nanoparticles.rb36
-rw-r--r--test/setup.rb4
13 files changed, 175 insertions, 112 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 89e9db2..a87678e 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -254,36 +254,6 @@ module OpenTox
self["chemblid"]
end
-=begin
- def fingerprint_neighbors(type:, min_sim: 0.1, dataset_id:, prediction_feature_id:)
- neighbors = []
- dataset = Dataset.find(dataset_id)
- query_fingerprint = self.fingerprint type
- dataset.compounds.each do |compound|
- values = dataset.values(compound,prediction_feature_id)
- if values
- candidate_fingerprint = compound.fingerprint type
-# features = (query_fingerprint + candidate_fingerprint).uniq
-# min_sum = 0
-# max_sum = 0
-# features.each do |f|
-# min,max = [query_fingerprint.count(f),candidate_fingerprint.count(f)].minmax
-# min_sum += min
-# max_sum += max
-# end
-# max_sum == 0 ? sim = 0 : sim = min_sum/max_sum.to_f
-# neighbors << [compound.id, sim] if sim and sim >= params[:min_sim]
-# end
-# end
-# neighbors.sort{|a,b| b.last <=> a.last}
- sim = Algorithm::Similarity.tanimoto(query_fingerprint , candidate_fingerprint)
- neighbors << {"_id" => compound.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
- end
- end
- neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]}
- end
-=end
-
def fingerprint_neighbors(type:, min_sim: 0.1, dataset_id:, prediction_feature_id:)
neighbors = []
dataset = Dataset.find(dataset_id)
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index 357f0fa..420dd8c 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -168,7 +168,7 @@ module OpenTox
def correlation_plot
unless correlation_plot_id
- plot_id = ValidationStatistics.correlation_plot predictions
+ plot_id = ValidationStatistics.correlation_plot id, predictions
update(:correlation_plot_id => plot_id)
end
$gridfs.find_one(_id: correlation_plot_id).data
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 9138452..0c65d61 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -46,11 +46,8 @@ module OpenTox
feature = feature.id if feature.is_a? Feature
data_entries[substance.to_s] ||= {}
data_entries[substance.to_s][feature.to_s] ||= []
- if value.is_a? Array
- data_entries[substance.to_s][feature.to_s] += value
- else
- data_entries[substance.to_s][feature.to_s] << value
- end
+ data_entries[substance.to_s][feature.to_s] << value
+ #data_entries[substance.to_s][feature.to_s].uniq! if value.numeric? # assuming that identical values come from the same source
end
# Dataset operations
@@ -75,6 +72,7 @@ module OpenTox
dataset = self.class.create(:source => self.id )
substances.each do |substance|
substance.dataset_ids << dataset.id
+ substance.dataset_ids.uniq!
substance.save
dataset.data_entries[substance.id.to_s] = data_entries[substance.id.to_s] ||= {}
end
@@ -200,7 +198,8 @@ module OpenTox
next
end
all_substances << substance
- substance.dataset_ids << self.id unless substance.dataset_ids.include? self.id
+ substance.dataset_ids << self.id
+ substance.dataset_ids.uniq!
substance.save
unless vals.size == features.size
diff --git a/lib/import.rb b/lib/import.rb
index 2dcc361..80d4579 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -10,15 +10,15 @@ module OpenTox
bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
bundles.each do |bundle|
- p bundle["title"]
+ $logger.debug bundle["title"]
nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
- p nanoparticles.size
+ $logger.debug nanoparticles.size
nanoparticles.each do |nanoparticle|
uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"]
$logger.debug uuid
File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)}
studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"]
- p uuid if studies.size < 1
+ $logger.debug uuid if studies.size < 1
studies.each do |study|
File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
end
@@ -27,35 +27,58 @@ module OpenTox
end
def self.import dir="."
+ start_time = Time.now
+ t1 = 0
+ t2 = 0
datasets = {}
JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
end
Dir[File.join(dir,"study*.json")].each do |s|
+ t = Time.now
study = JSON.parse(File.read(s))
np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
+ core = {}
+ coating = []
+ np["composition"].each do |c|
+ if c["relation"] == "HAS_CORE"
+ core = {
+ :uri => c["component"]["compound"]["URI"],
+ :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
+ }
+ elsif c["relation"] == "HAS_COATING"
+ coating << {
+ :uri => c["component"]["compound"]["URI"],
+ :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
+ }
+ end
+ end if np["composition"]
nanoparticle = Nanoparticle.find_or_create_by(
:name => np["values"]["https://data.enanomapper.net/identifier/name"],
:source => np["compound"]["URI"],
+ :core => core,
+ :coating => coating
)
np["bundles"].keys.each do |bundle_uri|
- nanoparticle["dataset_ids"] << datasets[bundle_uri].id
+ nanoparticle.dataset_ids << datasets[bundle_uri].id
end
- bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1
+ dataset = datasets[np["bundles"].keys.first]
+ proteomics_features = {}
study["effects"].each do |effect|
effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature
- # TODO parse core/coating
- #$logger.debug File.join(np["compound"]["URI"],"study")
effect["conditions"].delete_if { |k, v| v.nil? }
- # parse proteomics data
- if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50
+ if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
+=begin
JSON.parse(effect["result"]["textValue"]).each do |identifier, value|
- feature = klass.find_or_create_by(
- :name => identifier,
- :category => "Proteomics",
- )
- nanoparticle.parse_ambit_value feature, value, bundle
+ # time critical step
+ t = Time.now
+ proteomics_features[identifier] ||= klass.find_or_create_by(:name => identifier, :category => "Proteomics")
+ t1 += Time.now - t
+ t = Time.now
+ nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
+ t2 += Time.now - t
end
+=end
else
feature = klass.find_or_create_by(
:name => effect["endpoint"],
@@ -63,10 +86,14 @@ module OpenTox
:category => study["protocol"]["topcategory"],
:conditions => effect["conditions"]
)
- nanoparticle.parse_ambit_value feature, effect["result"], bundle
+ nanoparticle.parse_ambit_value feature, effect["result"], dataset
end
end
nanoparticle.save
+ #p "Total time: #{Time.now - start_time}"
+ #p "Proteomics features: #{t1}"
+ #p "Proteomics values: #{t2}"
+ #p "Time2: #{t2}"
end
datasets.each { |u,d| d.save }
end
diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb
index b8deae9..9698e05 100644
--- a/lib/leave-one-out-validation.rb
+++ b/lib/leave-one-out-validation.rb
@@ -86,7 +86,6 @@ module OpenTox
class RegressionLeaveOneOutValidation < LeaveOneOutValidation
- include Plot
field :rmse, type: Float, default: 0
field :mae, type: Float, default: 0
@@ -101,7 +100,7 @@ module OpenTox
def correlation_plot
unless correlation_plot_id
- #plot_id = correlation_plot
+ plot_id = ValidationStatistics.correlation_plot id, predictions
update(:correlation_plot_id => plot_id)
end
$gridfs.find_one(_id: correlation_plot_id).data
diff --git a/lib/model.rb b/lib/model.rb
index 3a178a1..18d621b 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -32,12 +32,13 @@ module OpenTox
self.neighbor_algorithm_parameters ||= {}
self.neighbor_algorithm_parameters[:dataset_id] = training_dataset.id
- Algorithm.run(feature_selection_algorithm, self) if feature_selection_algorithm
+ #send(feature_selection_algorithm.to_sym) if feature_selection_algorithm
save
self
end
def correlation_filter
+ self.relevant_features = {}
toxicities = []
substances = []
training_dataset.substances.each do |s|
@@ -49,23 +50,22 @@ module OpenTox
R.assign "tox", toxicities
feature_ids = training_dataset.substances.collect{ |s| s["physchem_descriptors"].keys}.flatten.uniq
feature_ids.each do |feature_id|
- feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id]}
+ feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]}
R.assign "feature", feature_values
begin
- #R.eval "cor <- cor.test(-log(tox),-log(feature),use='complete')"
- R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='complete')"
+ R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
pvalue = R.eval("cor$p.value").to_ruby
if pvalue <= 0.05
r = R.eval("cor$estimate").to_ruby
- relevant_features[feature] = {}
- relevant_features[feature]["pvalue"] = pvalue
- relevant_features[feature]["r"] = r
+ self.relevant_features[feature_id] = {}
+ self.relevant_features[feature_id]["pvalue"] = pvalue
+ self.relevant_features[feature_id]["r"] = r
end
rescue
warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{toxicities}) failed."
end
end
- relevant_features.sort!{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h
+ self.relevant_features = self.relevant_features.sort{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h
end
def predict_substance substance
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 7890a19..5c6d944 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -3,12 +3,11 @@ module OpenTox
class Nanoparticle < Substance
include OpenTox
- field :core, type: String
+ field :core, type: Hash, default: {}
field :coating, type: Array, default: []
- field :bundles, type: Array, default: []
field :proteomics, type: Hash, default: {}
- def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id:
+ def nanoparticle_neighbors_old min_sim: 0.9, type:, dataset_id:, prediction_feature_id:
dataset = Dataset.find(dataset_id)
neighbors = []
dataset.nanoparticles.each do |np|
@@ -25,33 +24,96 @@ module OpenTox
neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
neighbors
end
-
- def add_feature feature, value, dataset_id
+
+ def nanoparticle_neighbors min_sim: 0.9, type:, dataset_id:, prediction_feature_id:
+ p self.name
+ #p self.physchem_descriptors.keys.size
dataset = Dataset.find(dataset_id)
- case feature.category
- when "P-CHEM"
- physchem_descriptors[feature.id.to_s] ||= []
- physchem_descriptors[feature.id.to_s] << value
- physchem_descriptors[feature.id.to_s].uniq!
- when "Proteomics"
- proteomics[feature.id.to_s] ||= []
- proteomics[feature.id.to_s] << value
- proteomics[feature.id.to_s].uniq!
- when "TOX"
- # TODO generic way of parsing TOX values
- if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)"
- dataset.add self, feature, -Math.log10(value)
+ relevant_features = {}
+ toxicities = []
+ substances = []
+ # TODO: exclude query activities!!!
+ dataset.substances.each do |s|
+ dataset.values(s,prediction_feature_id).each do |act|
+ toxicities << act
+ substances << s
+ end
+ end
+ R.assign "tox", toxicities
+ feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature}
+ # identify relevant features
+ feature_ids.each do |feature_id|
+ feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]}
+ R.assign "feature", feature_values
+ begin
+ R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
+ pvalue = R.eval("cor$p.value").to_ruby
+ if pvalue <= 0.05
+ r = R.eval("cor$estimate").to_ruby
+ relevant_features[feature_id] = {}
+ relevant_features[feature_id]["pvalue"] = pvalue
+ relevant_features[feature_id]["r"] = r
+ relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby
+ relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby
+ end
+ rescue
+ warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{toxicities}) failed."
+ end
+ end
+ neighbors = []
+ substances.each do |substance|
+ values = dataset.values(substance,prediction_feature_id)
+ if values
+ common_descriptors = relevant_features.keys & substance.physchem_descriptors.keys
+ # scale values
+ query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
+ neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
+ #weights = common_descriptors.collect{|d| 1-relevant_features[d]["pvalue"]}
+ weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2}
+ #p weights
+ sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights)
+ ##p "SIM"
+ #p [sim, Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors)]
+ neighbors << {"_id" => substance.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
+ end
+ end
+ p neighbors.size
+ neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
+ neighbors
+ end
+
+ def add_feature feature, value, dataset
+ unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
+ case feature.category
+ when "P-CHEM"
+ physchem_descriptors[feature.id.to_s] ||= []
+ physchem_descriptors[feature.id.to_s] << value
+ physchem_descriptors[feature.id.to_s].uniq!
+ when "Proteomics"
+ proteomics[feature.id.to_s] ||= []
+ proteomics[feature.id.to_s] << value
+ proteomics[feature.id.to_s].uniq!
+ when "TOX"
+ # TODO generic way of parsing TOX values
+ if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)"
+ dataset.add self, feature, Math.log2(value)
+ elsif feature.name == "Total protein (BCA assay)"
+ physchem_descriptors[feature.id.to_s] ||= []
+ physchem_descriptors[feature.id.to_s] << value
+ physchem_descriptors[feature.id.to_s].uniq!
+ else
+ dataset.add self, feature, value
+ end
+ dataset.save
+ dataset_ids << dataset.id
+ dataset_ids.uniq!
else
- dataset.add self, feature, value
+ warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
end
- dataset.save
- else
- warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
end
end
- def parse_ambit_value feature, v, dataset_id
- dataset = Dataset.find(dataset_id)
+ def parse_ambit_value feature, v, dataset
v.delete "unit"
# TODO: ppm instead of weights
if v.keys == ["textValue"]
diff --git a/lib/regression.rb b/lib/regression.rb
index 9d305a6..6487557 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -71,7 +71,6 @@ module OpenTox
#def self.local_physchem_regression(substance:, neighbors:, feature_id:, dataset_id:, method: 'pls')#, method_params="ncomp = 4"
def self.local_physchem_regression substance, neighbors, method='pls' #, method_params="ncomp = 4"
- #dataset = Dataset.find dataset_id
activities = []
weights = []
pc_ids = neighbors.collect{|n| Substance.find(n["_id"]).physchem_descriptors.keys}.flatten.uniq
@@ -83,7 +82,6 @@ module OpenTox
activities = neighbor["toxicities"]
activities.each do |act|
data_frame[0][i] = act
- # TODO: update with cosine similarity for physchem
weights << n["similarity"]
neighbor.physchem_descriptors.each do |pid,values|
values = [values] unless values.is_a? Array
diff --git a/lib/similarity.rb b/lib/similarity.rb
index f25d4c3..00179c1 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -38,7 +38,7 @@ module OpenTox
magnitude_a += w[i].abs*a[i]**2
magnitude_b += w[i].abs*b[i]**2
end
- dot_product/Math.sqrt(magnitude_a*magnitude_b)
+ dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b))
end
end
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 156353a..e61543b 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -83,7 +83,7 @@ module OpenTox
end
R.assign "measurement", x
R.assign "prediction", y
- R.eval "r <- cor(measurement,prediction,use='complete')"
+ R.eval "r <- cor(measurement,prediction,use='pairwise')"
r = R.eval("r").to_ruby
mae = mae/predictions.size
@@ -99,11 +99,7 @@ module OpenTox
}
end
- end
-
- module Plot
-
- def plot_id
+ def self.correlation_plot id, predictions
tmpfile = "/tmp/#{id.to_s}_correlation.png"
x = []
y = []
@@ -115,10 +111,11 @@ module OpenTox
R.assign "prediction", y
R.eval "all = c(measurement,prediction)"
R.eval "range = c(min(all), max(all))"
- R.eval "image = qplot(prediction,measurement,main='',asp=1,xlim=range, ylim=range)"
+ # TODO units
+ R.eval "image = qplot(prediction,measurement,main='',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)"
R.eval "image = image + geom_abline(intercept=0, slope=1)"
R.eval "ggsave(file='#{tmpfile}', plot=image)"
- file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.png")
+ file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.png")
plot_id = $gridfs.insert_one(file)
plot_id
end
diff --git a/test/dataset.rb b/test/dataset.rb
index 7ec9973..e59441b 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -154,6 +154,7 @@ class DatasetTest < MiniTest::Test
c = Compound.from_smiles row.shift
serialized[c.inchi] = row
end
+ #puts serialized.to_yaml
original.each do |inchi,row|
row.each_with_index do |v,i|
if v.numeric?
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index 897552d..1cd1ff0 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -9,10 +9,9 @@ class NanoparticleTest < MiniTest::Test
end
def test_create_model_with_feature_selection
- skip
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
- feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"})
+ feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors", :feature_selection_algorithm => "correlation_filter"})
nanoparticle = training_dataset.nanoparticles[-34]
#p nanoparticle.neighbors
prediction = model.predict nanoparticle
@@ -23,45 +22,55 @@ class NanoparticleTest < MiniTest::Test
def test_create_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
- #p training_dataset.nanoparticles.size
feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"})
- #model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"})
nanoparticle = training_dataset.nanoparticles[-34]
prediction = model.predict nanoparticle
- p prediction
refute_nil prediction[:value]
+ assert_includes nanoparticle.dataset_ids, training_dataset.id
+ model.delete
end
def test_validate_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
- #feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"})
- p model
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
cv = RegressionCrossValidation.create model
p cv
+ File.open("tmp.png","w+"){|f| f.puts cv.correlation_plot}
+ refute_nil cv.r_squared
+ refute_nil cv.rmse
end
def test_validate_pls_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
- #feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)")
model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"})
- #model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"})
- p model
cv = RegressionCrossValidation.create model
p cv
+ File.open("tmp.png","w+"){|f| f.puts cv.correlation_plot}
+ refute_nil cv.r_squared
+ refute_nil cv.rmse
end
def test_export
- skip
Dataset.all.each do |d|
puts d.to_csv
end
end
def test_summaries
+ datasets = Dataset.all
+ datasets = datasets.select{|d| !d.name.nil?}
+ datasets.each do |d|
+ p d.name
+
+ #p d.features.select{|f| f.name.match (/Total/)}
+ #p d.features.collect{|f| "#{f.name} #{f.unit} #{f.conditions}"}
+ p d.features.uniq.collect{|f| f.name}
+ end
+ assert_equal 9, datasets.size
+=begin
skip
features = Feature.all.to_a
#p features.collect do |f|
@@ -83,6 +92,7 @@ class NanoparticleTest < MiniTest::Test
#pccounts.each{|e,n| p Feature.find(e),n if n > 100}
#p toxcounts.collect{|e,n| Feature.find(e).name if n > 1}.uniq
toxcounts.each{|e,n| p Feature.find(e),n if n > 100}
+=end
end
diff --git a/test/setup.rb b/test/setup.rb
index e7c32b4..6c97282 100644
--- a/test/setup.rb
+++ b/test/setup.rb
@@ -5,5 +5,5 @@ require_relative '../lib/lazar.rb'
include OpenTox
TEST_DIR ||= File.expand_path(File.dirname(__FILE__))
DATA_DIR ||= File.join(TEST_DIR,"data")
-$mongo.database.drop
-$gridfs = $mongo.database.fs
+#$mongo.database.drop
+#$gridfs = $mongo.database.fs