diff options
Diffstat (limited to 'lib/regression.rb')
-rw-r--r-- | lib/regression.rb | 41 |
1 files changed, 25 insertions, 16 deletions
diff --git a/lib/regression.rb b/lib/regression.rb index 5028c78..b9067c6 100644 --- a/lib/regression.rb +++ b/lib/regression.rb @@ -88,35 +88,42 @@ module OpenTox data_frame[j][i] = d[:scaled_value] end end if activities - (0..pc_ids.size+1).each do |j| # for R: fill empty values with NA + #(0..pc_ids.size+1).each do |j| # for R: fill empty values with NA + (0..pc_ids.size).each do |j| # for R: fill empty values with NA data_frame[j] ||= [] data_frame[j][i] ||= "NA" end end - remove_idx = [] - data_frame.each_with_index do |r,i| - remove_idx << i if r.uniq.size == 1 # remove properties with a single value - end + #remove_idx = [] + #data_frame.each_with_index do |r,i| + #remove_idx << i if r.uniq.size == 1 # remove properties with a single value TODO: don't break R names assignment + #end - remove_idx.reverse.each do |i| - data_frame.delete_at i - pc_ids.delete_at i - end + #p data_frame.size + #p pc_ids.size + #data_frame.delete_if.with_index { |_, index| remove_idx.include? index } + #pc_ids.delete_if.with_index { |_, index| remove_idx.include? index-1 } + #remove_idx.sort.reverse.each do |i| + #p i + #data_frame.delete_at i + #pc_ids.delete_at i + #end + #p data_frame.size + #p pc_ids.size if pc_ids.empty? prediction = local_weighted_average substance, neighbors prediction[:warning] = "No variables for regression model. Using weighted average of similar substances." prediction else - query_descriptors = pc_ids.collect do |i| - substance.scaled_values[i] ? substance.scaled_values[i] : "NA" - end + query_descriptors = pc_ids.collect { |i| substance.scaled_values[i] } remove_idx = [] query_descriptors.each_with_index do |v,i| - remove_idx << i if v == "NA" + #remove_idx << i if v == "NA" + remove_idx << i unless v end - remove_idx.reverse.each do |i| + remove_idx.sort.reverse.each do |i| data_frame.delete_at i pc_ids.delete_at i query_descriptors.delete_at i @@ -135,8 +142,9 @@ module OpenTox def self.r_model_prediction method, training_data, training_features, training_weights, query_feature_values R.assign "weights", training_weights r_data_frame = "data.frame(#{training_data.collect{|r| "c(#{r.join(',')})"}.join(', ')})" -rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R")) =begin +=end +rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R")) File.open("tmp.R","w+"){|f| f.puts "suppressPackageStartupMessages({ library(iterators,lib=\"#{rlib}\") @@ -159,10 +167,11 @@ rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R")) f.puts "names(fingerprint) <- features" f.puts "prediction <- predict(model,fingerprint)" } -=end R.eval "data <- #{r_data_frame}" R.assign "features", training_features + p training_features.size + p R.eval("names(data)").to_ruby.size begin R.eval "names(data) <- append(c('activities'),features)" # R.eval "model <- train(activities ~ ., data = data, method = '#{method}', na.action = na.pass)" |