summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-06-02 17:54:48 +0200
committerChristoph Helma <helma@in-silico.ch>2016-06-02 17:54:48 +0200
commiteec5bddbd35c9ecee8021128508d8718bccb4fe3 (patch)
tree93765b1f0b97415e7df5abdbcab7086f8c2fa7cf /lib
parent85f2308c101b4778508c2d767e08af4cfd671b7b (diff)
local pls regression for nanoparticle proteomics
Diffstat (limited to 'lib')
-rw-r--r--lib/import.rb15
-rw-r--r--lib/nanoparticle.rb12
-rw-r--r--lib/regression.rb41
3 files changed, 36 insertions, 32 deletions
diff --git a/lib/import.rb b/lib/import.rb
index 80d4579..4c49e5e 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -68,17 +68,10 @@ module OpenTox
effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature
effect["conditions"].delete_if { |k, v| v.nil? }
if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
-=begin
- JSON.parse(effect["result"]["textValue"]).each do |identifier, value|
- # time critical step
- t = Time.now
- proteomics_features[identifier] ||= klass.find_or_create_by(:name => identifier, :category => "Proteomics")
- t1 += Time.now - t
- t = Time.now
+ JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
+ proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics")
nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
- t2 += Time.now - t
end
-=end
else
feature = klass.find_or_create_by(
:name => effect["endpoint"],
@@ -90,10 +83,6 @@ module OpenTox
end
end
nanoparticle.save
- #p "Total time: #{Time.now - start_time}"
- #p "Proteomics features: #{t1}"
- #p "Proteomics values: #{t2}"
- #p "Time2: #{t2}"
end
datasets.each { |u,d| d.save }
end
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 65aab23..3e29ae1 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -10,6 +10,7 @@ module OpenTox
attr_accessor :scaled_values
def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:
+ p name
dataset = Dataset.find(dataset_id)
relevant_features = {}
measurements = []
@@ -46,6 +47,7 @@ module OpenTox
end
end
end
+ #p relevant_features.keys.collect{|i| Feature.find(i).name}
neighbors = []
substances.each do |substance|
values = dataset.values(substance,prediction_feature_id)
@@ -86,9 +88,12 @@ module OpenTox
physchem_descriptors[feature.id.to_s] << value
physchem_descriptors[feature.id.to_s].uniq!
when "Proteomics"
- proteomics[feature.id.to_s] ||= []
- proteomics[feature.id.to_s] << value
- proteomics[feature.id.to_s].uniq!
+ #proteomics[feature.id.to_s] ||= []
+ #proteomics[feature.id.to_s] << value
+ #proteomics[feature.id.to_s].uniq!
+ physchem_descriptors[feature.id.to_s] ||= []
+ physchem_descriptors[feature.id.to_s] << value
+ physchem_descriptors[feature.id.to_s].uniq!
when "TOX"
if feature.name == "Total protein (BCA assay)"
physchem_descriptors[feature.id.to_s] ||= []
@@ -109,6 +114,7 @@ module OpenTox
def parse_ambit_value feature, v, dataset
#p dataset
#p feature
+ # TODO add study id to warnings
v.delete "unit"
# TODO: ppm instead of weights
if v.keys == ["textValue"]
diff --git a/lib/regression.rb b/lib/regression.rb
index 5028c78..b9067c6 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -88,35 +88,42 @@ module OpenTox
data_frame[j][i] = d[:scaled_value]
end
end if activities
- (0..pc_ids.size+1).each do |j| # for R: fill empty values with NA
+ #(0..pc_ids.size+1).each do |j| # for R: fill empty values with NA
+ (0..pc_ids.size).each do |j| # for R: fill empty values with NA
data_frame[j] ||= []
data_frame[j][i] ||= "NA"
end
end
- remove_idx = []
- data_frame.each_with_index do |r,i|
- remove_idx << i if r.uniq.size == 1 # remove properties with a single value
- end
+ #remove_idx = []
+ #data_frame.each_with_index do |r,i|
+ #remove_idx << i if r.uniq.size == 1 # remove properties with a single value TODO: don't break R names assignment
+ #end
- remove_idx.reverse.each do |i|
- data_frame.delete_at i
- pc_ids.delete_at i
- end
+ #p data_frame.size
+ #p pc_ids.size
+ #data_frame.delete_if.with_index { |_, index| remove_idx.include? index }
+ #pc_ids.delete_if.with_index { |_, index| remove_idx.include? index-1 }
+ #remove_idx.sort.reverse.each do |i|
+ #p i
+ #data_frame.delete_at i
+ #pc_ids.delete_at i
+ #end
+ #p data_frame.size
+ #p pc_ids.size
if pc_ids.empty?
prediction = local_weighted_average substance, neighbors
prediction[:warning] = "No variables for regression model. Using weighted average of similar substances."
prediction
else
- query_descriptors = pc_ids.collect do |i|
- substance.scaled_values[i] ? substance.scaled_values[i] : "NA"
- end
+ query_descriptors = pc_ids.collect { |i| substance.scaled_values[i] }
remove_idx = []
query_descriptors.each_with_index do |v,i|
- remove_idx << i if v == "NA"
+ #remove_idx << i if v == "NA"
+ remove_idx << i unless v
end
- remove_idx.reverse.each do |i|
+ remove_idx.sort.reverse.each do |i|
data_frame.delete_at i
pc_ids.delete_at i
query_descriptors.delete_at i
@@ -135,8 +142,9 @@ module OpenTox
def self.r_model_prediction method, training_data, training_features, training_weights, query_feature_values
R.assign "weights", training_weights
r_data_frame = "data.frame(#{training_data.collect{|r| "c(#{r.join(',')})"}.join(', ')})"
-rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
=begin
+=end
+rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
File.open("tmp.R","w+"){|f|
f.puts "suppressPackageStartupMessages({
library(iterators,lib=\"#{rlib}\")
@@ -159,10 +167,11 @@ rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
f.puts "names(fingerprint) <- features"
f.puts "prediction <- predict(model,fingerprint)"
}
-=end
R.eval "data <- #{r_data_frame}"
R.assign "features", training_features
+ p training_features.size
+ p R.eval("names(data)").to_ruby.size
begin
R.eval "names(data) <- append(c('activities'),features)" #
R.eval "model <- train(activities ~ ., data = data, method = '#{method}', na.action = na.pass)"