summaryrefslogtreecommitdiff
path: root/lib/regression.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/regression.rb')
-rw-r--r--lib/regression.rb144
1 files changed, 7 insertions, 137 deletions
diff --git a/lib/regression.rb b/lib/regression.rb
index 5021fb3..3890987 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -3,148 +3,18 @@ module OpenTox
class Regression
- def self.local_weighted_average compound, params
+ def self.weighted_average dependent_variables:, independent_variables:nil, weights:, query_variables:nil
+ # TODO: prediction_interval
weighted_sum = 0.0
sim_sum = 0.0
- neighbors = params[:neighbors]
- neighbors.each do |row|
- sim = row["tanimoto"]
- if row["features"][params[:prediction_feature_id].to_s]
- row["features"][params[:prediction_feature_id].to_s].each do |act|
- weighted_sum += sim*Math.log10(act)
- sim_sum += sim
- end
- end
- end
- sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
+ dependent_variables.each_with_index do |v,i|
+ weighted_sum += weights[i]*dependent_variables[i]
+ sim_sum += weights[i]
+ end if dependent_variables
+ sim_sum == 0 ? prediction = nil : prediction = weighted_sum/sim_sum
{:value => prediction}
end
- # TODO explicit neighbors, also for physchem
- def self.local_fingerprint_regression compound, params, method='pls'#, method_params="sigma=0.05"
- neighbors = params[:neighbors]
- return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
- activities = []
- fingerprints = {}
- weights = []
- fingerprint_ids = neighbors.collect{|row| Compound.find(row["_id"]).fingerprint}.flatten.uniq.sort
-
- neighbors.each_with_index do |row,i|
- neighbor = Compound.find row["_id"]
- fingerprint = neighbor.fingerprint
- if row["features"][params[:prediction_feature_id].to_s]
- row["features"][params[:prediction_feature_id].to_s].each do |act|
- activities << Math.log10(act)
- weights << row["tanimoto"]
- fingerprint_ids.each_with_index do |id,j|
- fingerprints[id] ||= []
- fingerprints[id] << fingerprint.include?(id)
- end
- end
- end
- end
-
- variables = []
- data_frame = [activities]
- fingerprints.each do |k,v|
- unless v.uniq.size == 1
- data_frame << v.collect{|m| m ? "T" : "F"}
- variables << k
- end
- end
-
- if variables.empty?
- result = local_weighted_average(compound, params)
- result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
- return result
-
- else
- compound_features = variables.collect{|f| compound.fingerprint.include?(f) ? "T" : "F"}
- prediction = r_model_prediction method, data_frame, variables, weights, compound_features
- if prediction.nil? or prediction[:value].nil?
- prediction = local_weighted_average(compound, params)
- prediction[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
- return prediction
- else
- prediction[:prediction_interval] = [10**(prediction[:value]-1.96*prediction[:rmse]), 10**(prediction[:value]+1.96*prediction[:rmse])]
- prediction[:value] = 10**prediction[:value]
- prediction[:rmse] = 10**prediction[:rmse]
- prediction
- end
- end
-
- end
-
- def self.local_physchem_regression compound, params, method="plsr"#, method_params="ncomp = 4"
-
- neighbors = params[:neighbors]
- return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
- return {:value => neighbors.first["features"][params[:prediction_feature_id]], :confidence => nil, :warning => "Only one similar compound in the training set"} unless neighbors.size > 1
-
- activities = []
- weights = []
- physchem = {}
-
- neighbors.each_with_index do |row,i|
- neighbor = Compound.find row["_id"]
- if row["features"][params[:prediction_feature_id].to_s]
- row["features"][params[:prediction_feature_id].to_s].each do |act|
- activities << Math.log10(act)
- weights << row["tanimoto"] # TODO cosine ?
- neighbor.physchem.each do |pid,v| # insert physchem only if there is an activity
- physchem[pid] ||= []
- physchem[pid] << v
- end
- end
- end
- end
-
- # remove properties with a single value
- physchem.each do |pid,v|
- physchem.delete(pid) if v.uniq.size <= 1
- end
-
- if physchem.empty?
- result = local_weighted_average(compound, params)
- result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
- return result
-
- else
- data_frame = [activities] + physchem.keys.collect { |pid| physchem[pid] }
- prediction = r_model_prediction method, data_frame, physchem.keys, weights, physchem.keys.collect{|pid| compound.physchem[pid]}
- if prediction.nil?
- prediction = local_weighted_average(compound, params)
- prediction[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
- return prediction
- else
- prediction[:value] = 10**prediction[:value]
- prediction
- end
- end
-
- end
-
- def self.r_model_prediction method, training_data, training_features, training_weights, query_feature_values
- R.assign "weights", training_weights
- r_data_frame = "data.frame(#{training_data.collect{|r| "c(#{r.join(',')})"}.join(', ')})"
- R.eval "data <- #{r_data_frame}"
- R.assign "features", training_features
- R.eval "names(data) <- append(c('activities'),features)" #
- begin
- R.eval "model <- train(activities ~ ., data = data, method = '#{method}')"
- rescue
- return nil
- end
- R.eval "fingerprint <- data.frame(rbind(c(#{query_feature_values.join ','})))"
- R.eval "names(fingerprint) <- features"
- R.eval "prediction <- predict(model,fingerprint)"
- {
- :value => R.eval("prediction").to_f,
- :rmse => R.eval("getTrainPerf(model)$TrainRMSE").to_f,
- :r_squared => R.eval("getTrainPerf(model)$TrainRsquared").to_f,
- }
- end
-
end
end
end