summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-04-28 08:11:12 +0200
committerChristoph Helma <helma@in-silico.ch>2016-04-28 08:11:12 +0200
commit32d767ee7cfcc19337892551906950621f348174 (patch)
tree20b1f34e933f4f4fec053db6daeeaa99feebf993 /lib
parentcfc64a2966ab38698e499f0b44f41208ee77a07f (diff)
nanoparticle crossvalidation technically working
Diffstat (limited to 'lib')
-rw-r--r--lib/crossvalidation.rb2
-rw-r--r--lib/regression.rb14
-rw-r--r--lib/validation.rb6
3 files changed, 11 insertions, 11 deletions
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index 50afb6f..0ae36c4 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -44,7 +44,7 @@ module OpenTox
$logger.debug "Dataset #{training_dataset.name}, Fold #{fold_nr}: #{Time.now-t} seconds"
#end
end
- Process.waitall
+ #Process.waitall
cv.validation_ids = Validation.where(:crossvalidation_id => cv.id).distinct(:_id)
cv.validations.each do |validation|
nr_instances += validation.nr_instances
diff --git a/lib/regression.rb b/lib/regression.rb
index 5610a77..3a59c14 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -9,6 +9,7 @@ module OpenTox
neighbors = params[:neighbors]
neighbors.each do |row|
sim = row["tanimoto"]
+ sim ||= 1 # TODO: sim f nanoparticles
if row["toxicities"][params[:prediction_feature_id].to_s]
row["toxicities"][params[:prediction_feature_id].to_s].each do |act|
weighted_sum += sim*Math.log10(act)
@@ -120,7 +121,7 @@ module OpenTox
result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
return result
else
- query_descriptors = pc_ids.collect{|i| compound.physchem_descriptors[i].for_R}
+ query_descriptors = pc_ids.collect{|i| compound.physchem_descriptors[i].for_R if compound.physchem_descriptors[i]}.compact
remove_idx = []
query_descriptors.each_with_index do |v,i|
remove_idx << i if v == "NA"
@@ -172,13 +173,9 @@ rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
R.eval "data <- #{r_data_frame}"
R.assign "features", training_features
- R.eval "names(data) <- append(c('activities'),features)" #
- #begin
+ begin
+ R.eval "names(data) <- append(c('activities'),features)" #
R.eval "model <- train(activities ~ ., data = data, method = '#{method}', na.action = na.pass)"
- #rescue
- #return nil
- #end
- p query_feature_values
R.eval "fingerprint <- data.frame(rbind(c(#{query_feature_values.join ','})))"
R.eval "names(fingerprint) <- features"
R.eval "prediction <- predict(model,fingerprint)"
@@ -187,6 +184,9 @@ rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
:rmse => R.eval("getTrainPerf(model)$TrainRMSE").to_f,
:r_squared => R.eval("getTrainPerf(model)$TrainRsquared").to_f,
}
+ rescue
+ return nil
+ end
end
end
diff --git a/lib/validation.rb b/lib/validation.rb
index 6b515e4..68cb1a1 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -24,12 +24,12 @@ module OpenTox
def self.create model, training_set, test_set, crossvalidation=nil
- atts = model.attributes.dup # do not modify attributes from original model
+ atts = model.attributes.dup # do not modify attributes of the original model
atts["_id"] = BSON::ObjectId.new
atts[:training_dataset_id] = training_set.id
validation_model = model.class.create model.prediction_feature, training_set, atts
validation_model.save
- predictions = validation_model.predict test_set.compounds
+ predictions = validation_model.predict test_set.substances
predictions.each{|cid,p| p.delete(:neighbors)}
nr_unpredicted = 0
predictions.each do |cid,prediction|
@@ -43,7 +43,7 @@ module OpenTox
validation = self.new(
:model_id => validation_model.id,
:test_dataset_id => test_set.id,
- :nr_instances => test_set.compounds.size,
+ :nr_instances => test_set.substances.size,
:nr_unpredicted => nr_unpredicted,
:predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence
)