summaryrefslogtreecommitdiff
path: root/lib/regression.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-06-02 17:54:48 +0200
committerChristoph Helma <helma@in-silico.ch>2016-06-02 17:54:48 +0200
commiteec5bddbd35c9ecee8021128508d8718bccb4fe3 (patch)
tree93765b1f0b97415e7df5abdbcab7086f8c2fa7cf /lib/regression.rb
parent85f2308c101b4778508c2d767e08af4cfd671b7b (diff)
local pls regression for nanoparticle proteomics
Diffstat (limited to 'lib/regression.rb')
-rw-r--r--lib/regression.rb41
1 files changed, 25 insertions, 16 deletions
diff --git a/lib/regression.rb b/lib/regression.rb
index 5028c78..b9067c6 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -88,35 +88,42 @@ module OpenTox
data_frame[j][i] = d[:scaled_value]
end
end if activities
- (0..pc_ids.size+1).each do |j| # for R: fill empty values with NA
+ #(0..pc_ids.size+1).each do |j| # for R: fill empty values with NA
+ (0..pc_ids.size).each do |j| # for R: fill empty values with NA
data_frame[j] ||= []
data_frame[j][i] ||= "NA"
end
end
- remove_idx = []
- data_frame.each_with_index do |r,i|
- remove_idx << i if r.uniq.size == 1 # remove properties with a single value
- end
+ #remove_idx = []
+ #data_frame.each_with_index do |r,i|
+ #remove_idx << i if r.uniq.size == 1 # remove properties with a single value TODO: don't break R names assignment
+ #end
- remove_idx.reverse.each do |i|
- data_frame.delete_at i
- pc_ids.delete_at i
- end
+ #p data_frame.size
+ #p pc_ids.size
+ #data_frame.delete_if.with_index { |_, index| remove_idx.include? index }
+ #pc_ids.delete_if.with_index { |_, index| remove_idx.include? index-1 }
+ #remove_idx.sort.reverse.each do |i|
+ #p i
+ #data_frame.delete_at i
+ #pc_ids.delete_at i
+ #end
+ #p data_frame.size
+ #p pc_ids.size
if pc_ids.empty?
prediction = local_weighted_average substance, neighbors
prediction[:warning] = "No variables for regression model. Using weighted average of similar substances."
prediction
else
- query_descriptors = pc_ids.collect do |i|
- substance.scaled_values[i] ? substance.scaled_values[i] : "NA"
- end
+ query_descriptors = pc_ids.collect { |i| substance.scaled_values[i] }
remove_idx = []
query_descriptors.each_with_index do |v,i|
- remove_idx << i if v == "NA"
+ #remove_idx << i if v == "NA"
+ remove_idx << i unless v
end
- remove_idx.reverse.each do |i|
+ remove_idx.sort.reverse.each do |i|
data_frame.delete_at i
pc_ids.delete_at i
query_descriptors.delete_at i
@@ -135,8 +142,9 @@ module OpenTox
def self.r_model_prediction method, training_data, training_features, training_weights, query_feature_values
R.assign "weights", training_weights
r_data_frame = "data.frame(#{training_data.collect{|r| "c(#{r.join(',')})"}.join(', ')})"
-rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
=begin
+=end
+rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
File.open("tmp.R","w+"){|f|
f.puts "suppressPackageStartupMessages({
library(iterators,lib=\"#{rlib}\")
@@ -159,10 +167,11 @@ rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
f.puts "names(fingerprint) <- features"
f.puts "prediction <- predict(model,fingerprint)"
}
-=end
R.eval "data <- #{r_data_frame}"
R.assign "features", training_features
+ p training_features.size
+ p R.eval("names(data)").to_ruby.size
begin
R.eval "names(data) <- append(c('activities'),features)" #
R.eval "model <- train(activities ~ ., data = data, method = '#{method}', na.action = na.pass)"