summaryrefslogtreecommitdiff
path: root/lib/regression.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-02-13 13:15:29 +0100
committerChristoph Helma <helma@in-silico.ch>2016-02-13 13:15:29 +0100
commite778475c578f13f30af4437845716d7e781c2609 (patch)
tree82c14dabc4cf29df1f097a9f8c5c4d8b0b406c4d /lib/regression.rb
parentf61b7d3c65d084747dc1bf87214e5ec0c57326be (diff)
improved handling of duplicates in validations
Diffstat (limited to 'lib/regression.rb')
-rw-r--r--lib/regression.rb62
1 files changed, 37 insertions, 25 deletions
diff --git a/lib/regression.rb b/lib/regression.rb
index 7c64d8f..2b41851 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -4,23 +4,19 @@ module OpenTox
class Regression
def self.weighted_average compound, params
- #p params.keys
weighted_sum = 0.0
sim_sum = 0.0
confidence = 0.0
neighbors = params[:neighbors]
- #activities = []
neighbors.each do |row|
- #if row["dataset_ids"].include? params[:training_dataset_id]
- sim = row["tanimoto"]
- confidence = sim if sim > confidence # distance to nearest neighbor
- # TODO add LOO errors
- row["features"][params[:prediction_feature_id].to_s].each do |act|
- weighted_sum += sim*Math.log10(act)
- #activities << act # TODO: Transformation??
- sim_sum += sim
- end
- #end
+ sim = row["tanimoto"]
+ confidence = sim if sim > confidence # distance to nearest neighbor
+ # TODO add LOO errors
+ row["features"][params[:prediction_feature_id].to_s].each do |act|
+ weighted_sum += sim*Math.log10(act)
+ #activities << act # TODO: Transformation??
+ sim_sum += sim
+ end
end
#R.assign "activities", activities
#R.eval "cv = cv(activities)"
@@ -35,7 +31,7 @@ module OpenTox
def self.local_pls_regression compound, params
neighbors = params[:neighbors]
- return {:value => nil, :confidence => nil} unless neighbors.size > 0
+ return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
activities = []
fingerprints = {}
weights = []
@@ -62,21 +58,37 @@ module OpenTox
fingerprints.each do |k,v|
unless v.uniq.size == 1
data_frame << "factor(c(#{v.collect{|m| m ? "T" : "F"}.join ","}))"
- variables << "'#{k}'"
+ variables << k
end
end
- begin
+ if variables.empty?
+ result = weighted_average(compound, params)
+ result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
+ return result
+ return {:value => nil, :confidence => nil} # TODO confidence
+ else
R.eval "data <- data.frame(#{data_frame.join ","})"
- R.eval "names(data) <- c('activities',#{variables.join ','})"
- R.eval "model <- plsr(activities ~ .,data = data, ncomp = 3, weights = weights)"
- compound_features = fingerprint_ids.collect{|f| compound.fingerprint.include? f }
- R.eval "fingerprint <- rbind(c(#{compound_features.collect{|f| f ? "T" : "F"}.join ','}))"
- R.eval "names(fingerprint) <- c(#{variables.join ','})"
- R.eval "prediction <- predict(model,fingerprint)"
- prediction = 10**R.eval("prediction").to_f
- {:value => prediction, :confidence => 1} # TODO confidence
- rescue
- {:value => nil, :confidence => nil} # TODO confidence
+ R.assign "features", variables
+ R.eval "names(data) <- append(c('activities'),features)" #
+ begin
+ R.eval "model <- plsr(activities ~ .,data = data, ncomp = 4, weights = weights)"
+ rescue # fall back to weighted average
+ result = weighted_average(compound, params)
+ result[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
+ return result
+ end
+ #begin
+ #compound_features = fingerprint_ids.collect{|f| compound.fingerprint.include? f } # FIX
+ compound_features = variables.collect{|f| compound.fingerprint.include? f }
+ R.eval "fingerprint <- rbind(c(#{compound_features.collect{|f| f ? "T" : "F"}.join ','}))"
+ R.eval "names(fingerprint) <- features" #
+ R.eval "prediction <- predict(model,fingerprint)"
+ prediction = 10**R.eval("prediction").to_f
+ return {:value => prediction, :confidence => 1} # TODO confidence
+ #rescue
+ #p "Prediction failed"
+ #return {:value => nil, :confidence => nil} # TODO confidence
+ #end
end
end