diff options
author | mguetlein <martin.guetlein@gmail.com> | 2013-02-04 12:05:25 +0100 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2013-02-04 12:05:25 +0100 |
commit | 63f6733956edbfa9326dcff0ba60587afd2410ed (patch) | |
tree | c744c7ba9aeb7370dcdc6a9cd3677387175d168e | |
parent | 7a75d09664033fccaabe8e58d2e17af2abd595f3 (diff) | |
parent | 424c542130ef5b8ebd8f95403d21fd8df515bcd0 (diff) |
resolved merge conflicts
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Gemfile.lock | 126 | ||||
-rw-r--r-- | VERSION | 1 | ||||
-rw-r--r-- | algorithm.gemspec | 30 | ||||
-rw-r--r-- | application.rb | 18 | ||||
-rw-r--r-- | lib/algorithm/fminer.rb | 17 | ||||
-rw-r--r-- | lib/algorithm/neighbors.rb | 80 | ||||
-rw-r--r-- | lib/algorithm/transform.rb | 282 | ||||
-rw-r--r-- | lib/model.rb | 1 | ||||
m--------- | libfminer | 0 | ||||
-rw-r--r-- | webapp/fminer.rb | 447 | ||||
-rw-r--r-- | webapp/fs.rb | 4 | ||||
-rw-r--r-- | webapp/lazar.rb | 175 | ||||
-rw-r--r-- | webapp/sinatra.rb | 3 |
14 files changed, 574 insertions, 611 deletions
@@ -2,3 +2,4 @@ tmp/* log/* public/*.owl *.*~ +Gemfile.lock diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index 49acd91..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,126 +0,0 @@ -PATH - remote: ~/opentox-ruby/opentox-client - specs: - opentox-client (1.0.0pre1) - bundler - open4 - rdf - rdf-n3 - rdf-raptor - rest-client - -PATH - remote: ~/opentox-ruby/opentox-server - specs: - opentox-server (1.0.0pre1) - opentox-client - rack - rack-contrib - sinatra - sinatra-contrib - unicorn - -GEM - remote: http://rubygems.org/ - specs: - addressable (2.3.2) - backports (2.6.4) - clbustos-rtf (0.4.2) - dirty-memoize (0.0.4) - distribution (0.7.0) - emk-sinatra-url-for (0.2.1) - sinatra (>= 0.9.1.1) - eventmachine (0.12.10) - extendmatrix (0.3.1) - fastercsv (1.5.5) - ffi (1.1.5) - gsl (1.14.7) - narray (>= 0.5.9) - kgio (2.7.4) - mime-types (1.19) - minimization (0.2.1) - text-table (~> 1.2) - narray (0.6.0.1) - nokogiri (1.4.7) - open4 (1.3.0) - openbabel (2.3.1.6) - prawn (0.8.4) - prawn-core (>= 0.8.4, < 0.9) - prawn-layout (>= 0.8.4, < 0.9) - prawn-security (>= 0.8.4, < 0.9) - prawn-core (0.8.4) - prawn-layout (0.8.4) - prawn-security (0.8.4) - prawn-svg (0.9.1.11) - prawn (>= 0.8.4) - rack (1.4.1) - rack-contrib (1.1.0) - rack (>= 0.9.1) - rack-protection (1.2.0) - rack - rack-test (0.6.1) - rack (>= 1.0) - raindrops (0.10.0) - rdf (0.3.8) - addressable (>= 2.2.6) - rdf-n3 (0.3.7) - rdf (>= 0.3.4) - rdf-raptor (0.4.1) - ffi (>= 1.0) - rdf (~> 0.3.0) - reportbuilder (1.4.2) - clbustos-rtf (~> 0.4.0) - prawn (~> 0.8.4) - prawn-svg (~> 0.9.1) - text-table (~> 1.2) - rest-client (1.6.7) - mime-types (>= 1.16) - rinruby (2.0.3) - rserve-client (0.2.5) - ruby-ole (1.2.11.4) - rubyvis (0.5.2) - sinatra (1.3.3) - rack (~> 1.3, >= 1.3.6) - rack-protection (~> 1.2) - tilt (~> 1.3, >= 1.3.3) - sinatra-contrib (1.3.1) - backports (>= 2.0) - eventmachine - rack-protection - rack-test - sinatra (~> 1.3.0) - tilt (~> 1.3) - spreadsheet (0.6.9) - ruby-ole (>= 1.0) - statsample (1.2.0) - dirty-memoize (~> 0.0) - distribution (~> 0.6) - extendmatrix (~> 0.3.1) - fastercsv (> 0) - minimization (~> 0.2.0) - reportbuilder (~> 1.4) - rserve-client (~> 0.2.5) - rubyvis (~> 0.5) - spreadsheet (~> 0.6.5) - statsample-bivariate-extension (> 0) - statsample-bivariate-extension (1.1.0) - distribution (~> 0.6) - text-table (1.2.2) - tilt (1.3.3) - unicorn (4.4.0) - kgio (~> 2.6) - rack - raindrops (~> 0.7) - -PLATFORMS - ruby - -DEPENDENCIES - emk-sinatra-url-for (~> 0.2.1) - gsl (~> 1.14) - nokogiri (~> 1.4.4) - openbabel (~> 2.3.1.5) - opentox-client! - opentox-server! - rinruby (~> 2.0.2) - statsample (~> 1.1) @@ -0,0 +1 @@ +5.0.0pre2 diff --git a/algorithm.gemspec b/algorithm.gemspec new file mode 100644 index 0000000..925c397 --- /dev/null +++ b/algorithm.gemspec @@ -0,0 +1,30 @@ +# -*- encoding: utf-8 -*- +$:.push File.expand_path("../lib", __FILE__) + +Gem::Specification.new do |s| + s.name = "opentox-algorithm" + s.version = File.read("./VERSION") + s.authors = ["Christoph Helma"] + s.email = ["helma@in-silico.ch"] + s.homepage = "http://github.com/opentox/algorithm" + s.summary = %q{OpenTox Algorithm Service} + s.description = %q{OpenTox Algorithm Service} + s.license = 'GPL-3' + + s.rubyforge_project = "algorithm" + + s.files = `git ls-files`.split("\n") + s.required_ruby_version = '>= 1.9.2' + + # specify any dependencies here; for example: + s.add_runtime_dependency "opentox-server" + s.add_runtime_dependency "opentox-client" + s.add_runtime_dependency 'rinruby', "~>2.0.2" + s.add_runtime_dependency 'nokogiri', "~>1.4.4" + s.add_runtime_dependency 'statsample', "~>1.1" + s.add_runtime_dependency 'gsl', "~>1.14" + s.add_runtime_dependency 'emk-sinatra-url-for', "~>0.2.1" +# s.add_runtime_dependency '', "~>" + s.add_runtime_dependency "openbabel", "~>2.3.1.5" + s.post_install_message = "Please configure your service in ~/.opentox/config/algorithm.rb" +end diff --git a/application.rb b/application.rb index 09b90f6..1b9c776 100644 --- a/application.rb +++ b/application.rb @@ -2,8 +2,6 @@ # Loads sub-repositories, library code, and webapps. # Author: Andreas Maunz -require 'sinatra/url_for' - # Require sub-Repositories require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # include before openbabel require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # @@ -19,14 +17,14 @@ Dir['./webapp/*.rb'].each { |f| require f } # Webapps module OpenTox class Application < Service get '/?' do - list = [ url_for('/lazar', :full), - url_for('/fminer/bbrc', :full), - url_for('/fminer/bbrc/sample', :full), - url_for('/fminer/last', :full), - url_for('/fminer/bbrc/match', :full), - url_for('/fminer/last/match', :full), - url_for('/fs/rfe', :full), - url_for('/pc', :full) ].join("\n") + "\n" + list = [ to('/lazar', :full), + to('/fminer/bbrc', :full), + to('/fminer/bbrc/sample', :full), + to('/fminer/last', :full), + to('/fminer/bbrc/match', :full), + to('/fminer/last/match', :full), + to('/fs/rfe', :full), + to('/pc', :full) ].join("\n") + "\n" format_output (list) end end diff --git a/lib/algorithm/fminer.rb b/lib/algorithm/fminer.rb index dd341db..071cb05 100644 --- a/lib/algorithm/fminer.rb +++ b/lib/algorithm/fminer.rb @@ -122,17 +122,12 @@ module OpenTox elsif @prediction_feature.feature_type == "regression" activity= compound_activities.to_f end - begin - fminer_instance.AddCompound(compound.smiles,id) if fminer_instance - fminer_instance.AddActivity(activity, id) if fminer_instance - @all_activities[id]=activity # DV: insert global information - @compounds[id] = compound - @smi[id] = compound.smiles - id += 1 - rescue Exception => e - LOGGER.warn "Could not add " + smiles + "\t" + values[i].to_s + " to fminer" - LOGGER.warn e.backtrace - end + fminer_instance.AddCompound(compound.smiles,id) if fminer_instance + fminer_instance.AddActivity(activity, id) if fminer_instance + @all_activities[id]=activity # DV: insert global information + @compounds[id] = compound + @smi[id] = compound.smiles + id += 1 end end end diff --git a/lib/algorithm/neighbors.rb b/lib/algorithm/neighbors.rb index cdfc5b7..1a82dc8 100644 --- a/lib/algorithm/neighbors.rb +++ b/lib/algorithm/neighbors.rb @@ -69,29 +69,24 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_svm_regression(params) - begin - confidence = 0.0 - prediction = nil - - $logger.debug "Local SVM." - if params[:acts].size>0 - if params[:props] - n_prop = params[:props][0].collect.to_a - q_prop = params[:props][1].collect.to_a - props = [ n_prop, q_prop ] - end - acts = params[:acts].collect.to_a - prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting - prediction = nil if (!prediction.nil? && prediction.infinite?) - #$logger.debug "Prediction: '" + prediction.to_s + "' ('#{prediction.class}')." - confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]}) - confidence = 0.0 if prediction.nil? + confidence = 0.0 + prediction = nil + + $logger.debug "Local SVM." + if params[:acts].size>0 + if params[:props] + n_prop = params[:props][0].collect.to_a + q_prop = params[:props][1].collect.to_a + props = [ n_prop, q_prop ] end - {:prediction => prediction, :confidence => confidence} - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + acts = params[:acts].collect.to_a + prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting + prediction = nil if (!prediction.nil? && prediction.infinite?) + #$logger.debug "Prediction: '" + prediction.to_s + "' ('#{prediction.class}')." + confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]}) + confidence = 0.0 if prediction.nil? end + {:prediction => prediction, :confidence => confidence} end @@ -101,30 +96,25 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_svm_classification(params) - begin - confidence = 0.0 - prediction = nil + confidence = 0.0 + prediction = nil - $logger.debug "Local SVM." - if params[:acts].size>0 - if params[:props] - n_prop = params[:props][0].collect.to_a - q_prop = params[:props][1].collect.to_a - props = [ n_prop, q_prop ] - end - acts = params[:acts].collect.to_a - acts = acts.collect{|v| "Val" + v.to_s} # Convert to string for R to recognize classification - prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting - prediction = prediction.sub(/Val/,"") if prediction # Convert back - confidence = 0.0 if prediction.nil? - #$logger.debug "Prediction: '" + prediction.to_s + "' ('#{prediction.class}')." - confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]}) + $logger.debug "Local SVM." + if params[:acts].size>0 + if params[:props] + n_prop = params[:props][0].collect.to_a + q_prop = params[:props][1].collect.to_a + props = [ n_prop, q_prop ] end - {:prediction => prediction, :confidence => confidence} - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + acts = params[:acts].collect.to_a + acts = acts.collect{|v| "Val" + v.to_s} # Convert to string for R to recognize classification + prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting + prediction = prediction.sub(/Val/,"") if prediction # Convert back + confidence = 0.0 if prediction.nil? + #$logger.debug "Prediction: '" + prediction.to_s + "' ('#{prediction.class}')." + confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]}) end + {:prediction => prediction, :confidence => confidence} end @@ -216,9 +206,9 @@ module OpenTox prediction = nil if prediction =~ /NA/ prediction = nil unless train_success $logger.debug "Performance: '#{sprintf("%.2f", @r.perf)}'" - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + #rescue Exception => e + #$logger.debug "#{e.class}: #{e.message}" + #$logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" ensure @r.quit # free R end diff --git a/lib/algorithm/transform.rb b/lib/algorithm/transform.rb index afc76f9..ec25526 100644 --- a/lib/algorithm/transform.rb +++ b/lib/algorithm/transform.rb @@ -18,42 +18,27 @@ module OpenTox # @param [GSL::Vector] values Values to transform using AutoScaling. def initialize values - begin - raise "Cannot transform, values empty." if values.size==0 - vs = values.clone - @mean = vs.to_scale.mean - @stdev = vs.to_scale.standard_deviation_population - @stdev = 0.0 if @stdev.nan? - @vs = transform vs - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + bad_request_error "Cannot transform, values empty." if values.size==0 + vs = values.clone + @mean = vs.to_scale.mean + @stdev = vs.to_scale.standard_deviation_population + @stdev = 0.0 if @stdev.nan? + @vs = transform vs end # @param [GSL::Vector] values Values to transform. # @return [GSL::Vector] transformed values. def transform values - begin - raise "Cannot transform, values empty." if values.size==0 - autoscale values.clone - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + bad_request_error "Cannot transform, values empty." if values.size==0 + autoscale values.clone end # @param [GSL::Vector] values Values to restore. # @return [GSL::Vector] transformed values. def restore values - begin - raise "Cannot transform, values empty." if values.size==0 - rv_ss = values.clone.to_scale * @stdev unless @stdev == 0.0 - (rv_ss + @mean).to_gsl - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + bad_request_error "Cannot transform, values empty." if values.size==0 + rv_ss = values.clone.to_scale * @stdev unless @stdev == 0.0 + (rv_ss + @mean).to_gsl end # @param [GSL::Vector] values to transform. @@ -77,68 +62,63 @@ module OpenTox # @return [GSL::Matrix] Data transformed matrix. def initialize data_matrix, compression=0.05, maxcols=(1.0/0.0) - begin - @data_matrix = data_matrix.clone - @compression = compression.to_f - @mean = Array.new - @autoscaler = Array.new - @cols = Array.new - @maxcols = maxcols - - # Objective Feature Selection - raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 - @data_matrix_selected = nil - (0..@data_matrix.size2-1).each { |i| - if !@data_matrix.col(i).to_a.zero_variance? - if @data_matrix_selected.nil? - @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) - @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) - else - @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1)) - end - @cols << i - end - } - raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2) - - # PCA uses internal centering on 0 - @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @cols.size) - (0..@cols.size-1).each { |i| - as = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix_selected.col(i)) - @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = as.vs * as.stdev # re-adjust by stdev - @mean << as.mean - @autoscaler << as - } - - # PCA - data_matrix_hash = Hash.new - (0..@cols.size-1).each { |i| - column_view = @data_matrix_scaled.col(i) - data_matrix_hash[i] = column_view.to_scale - } - dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9 - cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash) - pca=Statsample::Factor::PCA.new(cor_matrix) - - # Select best eigenvectors - pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? } - @eigenvalue_sums = Array.new - (0..@cols.size-1).each { |i| - @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } - } - eigenvectors_selected = Array.new - pca.eigenvectors.each_with_index { |ev, i| - if (@eigenvalue_sums[i] <= ((1.0-@compression)*@cols.size)) || (eigenvectors_selected.size == 0) - eigenvectors_selected << ev.to_a unless @maxcols <= eigenvectors_selected.size + @data_matrix = data_matrix.clone + @compression = compression.to_f + @mean = Array.new + @autoscaler = Array.new + @cols = Array.new + @maxcols = maxcols + + # Objective Feature Selection + bad_request_error "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 + @data_matrix_selected = nil + (0..@data_matrix.size2-1).each { |i| + if !@data_matrix.col(i).to_a.zero_variance? + if @data_matrix_selected.nil? + @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) + @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) + else + @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1)) end - } - @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, @cols.size).transpose - @data_transformed_matrix = (@eigenvector_matrix.transpose * @data_matrix_scaled.transpose).transpose + @cols << i + end + } + bad_request_error "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2) + + # PCA uses internal centering on 0 + @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @cols.size) + (0..@cols.size-1).each { |i| + as = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix_selected.col(i)) + @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = as.vs * as.stdev # re-adjust by stdev + @mean << as.mean + @autoscaler << as + } + + # PCA + data_matrix_hash = Hash.new + (0..@cols.size-1).each { |i| + column_view = @data_matrix_scaled.col(i) + data_matrix_hash[i] = column_view.to_scale + } + dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9 + cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash) + pca=Statsample::Factor::PCA.new(cor_matrix) + + # Select best eigenvectors + pca.eigenvalues.each { |ev| bad_request_error "PCA failed!" unless !ev.nan? } + @eigenvalue_sums = Array.new + (0..@cols.size-1).each { |i| + @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } + } + eigenvectors_selected = Array.new + pca.eigenvectors.each_with_index { |ev, i| + if (@eigenvalue_sums[i] <= ((1.0-@compression)*@cols.size)) || (eigenvectors_selected.size == 0) + eigenvectors_selected << ev.to_a unless @maxcols <= eigenvectors_selected.size + end + } + @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, @cols.size).transpose + @data_transformed_matrix = (@eigenvector_matrix.transpose * @data_matrix_scaled.transpose).transpose - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end end # Transforms data to feature space found by PCA. @@ -146,35 +126,25 @@ module OpenTox # @param [GSL::Matrix] values Data matrix. # @return [GSL::Matrix] Transformed data matrix. def transform values - begin - vs = values.clone - raise "Error! Too few columns for transformation." if vs.size2 < @cols.max - data_matrix_scaled = GSL::Matrix.alloc(vs.size1, @cols.size) - @cols.each_with_index { |i,j| - data_matrix_scaled.col(j)[0..data_matrix_scaled.size1-1] = @autoscaler[j].transform(vs.col(i).to_a) * @autoscaler[j].stdev - } - (@eigenvector_matrix.transpose * data_matrix_scaled.transpose).transpose - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + vs = values.clone + bad_request_error "Error! Too few columns for transformation." if vs.size2 < @cols.max + data_matrix_scaled = GSL::Matrix.alloc(vs.size1, @cols.size) + @cols.each_with_index { |i,j| + data_matrix_scaled.col(j)[0..data_matrix_scaled.size1-1] = @autoscaler[j].transform(vs.col(i).to_a) * @autoscaler[j].stdev + } + (@eigenvector_matrix.transpose * data_matrix_scaled.transpose).transpose end # Restores data in the original feature space (possibly with compression loss). # # @return [GSL::Matrix] Data matrix. def restore - begin - data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca - # reverse scaling - (0..@cols.size-1).each { |i| - data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i] - } - data_matrix_restored - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca + # reverse scaling + (0..@cols.size-1).each { |i| + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i] + } + data_matrix_restored end end @@ -191,45 +161,40 @@ module OpenTox # @return [GSL::Matrix] Data transformed matrix def initialize data_matrix, compression=0.05 - begin - @data_matrix = data_matrix.clone - @compression = compression - - # Compute the SV Decomposition X=USV - # vt is *not* the transpose of V here, but V itself (see http://goo.gl/mm2xz)! - u, vt, s = data_matrix.SV_decomp - - # Determine cutoff index - s2 = s.mul(s) ; s2_sum = s2.sum - s2_run = 0 - k = s2.size - 1 - s2.to_a.reverse.each { |v| - s2_run += v - frac = s2_run / s2_sum - break if frac > compression - k -= 1 - } - k += 1 if k == 0 # avoid uni-dimensional (always cos sim of 1) - - # Take the k-rank approximation of the Matrix - # - Take first k columns of u - # - Take first k columns of vt - # - Take the first k eigenvalues - @uk = u.submatrix(nil, (0..k)) # used to transform column format data - @vk = vt.submatrix(nil, (0..k)) # used to transform row format data - s = GSL::Matrix.diagonal(s) - @eigk = s.submatrix((0..k), (0..k)) - @eigk_inv = @eigk.inv - - # Transform data - @data_transformed_matrix = @uk # = u for all SVs - # NOTE: @data_transformed_matrix is also equal to - # @data_matrix * @vk * @eigk_inv - - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + @data_matrix = data_matrix.clone + @compression = compression + + # Compute the SV Decomposition X=USV + # vt is *not* the transpose of V here, but V itself (see http://goo.gl/mm2xz)! + u, vt, s = data_matrix.SV_decomp + + # Determine cutoff index + s2 = s.mul(s) ; s2_sum = s2.sum + s2_run = 0 + k = s2.size - 1 + s2.to_a.reverse.each { |v| + s2_run += v + frac = s2_run / s2_sum + break if frac > compression + k -= 1 + } + k += 1 if k == 0 # avoid uni-dimensional (always cos sim of 1) + + # Take the k-rank approximation of the Matrix + # - Take first k columns of u + # - Take first k columns of vt + # - Take the first k eigenvalues + @uk = u.submatrix(nil, (0..k)) # used to transform column format data + @vk = vt.submatrix(nil, (0..k)) # used to transform row format data + s = GSL::Matrix.diagonal(s) + @eigk = s.submatrix((0..k), (0..k)) + @eigk_inv = @eigk.inv + + # Transform data + @data_transformed_matrix = @uk # = u for all SVs + # NOTE: @data_transformed_matrix is also equal to + # @data_matrix * @vk * @eigk_inv + end @@ -238,12 +203,7 @@ module OpenTox # @param [GSL::Matrix] values Data matrix (1 x m). # @return [GSL::Matrix] Transformed data matrix. def transform_instance values - begin - values * @vk * @eigk_inv - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + values * @vk * @eigk_inv end alias :transform :transform_instance # make this the default (see PCA interface) @@ -252,12 +212,7 @@ module OpenTox # @param [GSL::Matrix] values Data matrix (1 x n). # @return [GSL::Matrix] Transformed data matrix. def transform_feature values - begin - values * @uk * @eigk_inv - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + values * @uk * @eigk_inv end @@ -265,12 +220,7 @@ module OpenTox # # @return [GSL::Matrix] Data matrix. def restore - begin - @data_transformed_matrix * @eigk * @vk.transpose # reverse svd - rescue Exception => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + @data_transformed_matrix * @eigk * @vk.transpose # reverse svd end diff --git a/lib/model.rb b/lib/model.rb index 1c10b35..a808aa7 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -20,6 +20,7 @@ module OpenTox # Internal use only def prepare_prediction_model(params) + puts params.inspect params.each {|k,v| self.class.class_eval { attr_accessor k.to_sym } instance_variable_set(eval(":@"+k), v) diff --git a/libfminer b/libfminer -Subproject 4577dcacb352af3fdca925714dc570de9e02582 +Subproject 9c7ca17efe84fa722ac7a9ba40fee92f51d0427 diff --git a/webapp/fminer.rb b/webapp/fminer.rb index 081a408..8f4b058 100644 --- a/webapp/fminer.rb +++ b/webapp/fminer.rb @@ -22,11 +22,11 @@ module OpenTox # Get list of fminer algorithms # @return [text/uri-list] URIs get '/fminer/?' do - list = [ url_for('/fminer/bbrc', :full), - url_for('/fminer/bbrc/sample', :full), - url_for('/fminer/last', :full), - url_for('/fminer/bbrc/match', :full), - url_for('/fminer/last/match', :full) + list = [ to('/fminer/bbrc', :full), + to('/fminer/bbrc/sample', :full), + to('/fminer/last', :full), + to('/fminer/bbrc/match', :full), + to('/fminer/last/match', :full) ].join("\n") + "\n" format_output(list) end @@ -34,7 +34,7 @@ module OpenTox # Get representation of BBRC algorithm # @return [String] Representation get "/fminer/bbrc/?" do - algorithm = OpenTox::Algorithm.new(url_for('/fminer/bbrc',:full)) + algorithm = OpenTox::Algorithm.new(to('/fminer/bbrc',:full)) algorithm.metadata = { DC.title => 'Backbone Refinement Class Representatives', DC.creator => "andreas@maunz.de", @@ -56,7 +56,7 @@ module OpenTox # Get representation of BBRC-sample algorithm # @return [String] Representation get "/fminer/bbrc/sample/?" do - algorithm = OpenTox::Algorithm.new(url_for('/fminer/bbrc/sample',:full)) + algorithm = OpenTox::Algorithm.new(to('/fminer/bbrc/sample',:full)) algorithm.metadata = { DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset', DC.creator => "andreas@maunz.de", @@ -78,7 +78,7 @@ module OpenTox # Get representation of fminer LAST-PM algorithm # @return [String] Representation get "/fminer/last/?" do - algorithm = OpenTox::Algorithm.new(url_for('/fminer/last',:full)) + algorithm = OpenTox::Algorithm.new(to('/fminer/last',:full)) algorithm.metadata = { DC.title => 'Latent Structure Pattern Mining descriptors', DC.creator => "andreas@maunz.de", @@ -99,7 +99,7 @@ module OpenTox # Get representation of matching algorithm # @return [String] Representation get "/fminer/:method/match?" do - algorithm = OpenTox::Algorithm.new(url_for("/fminer/#{params[:method]}/match",:full)) + algorithm = OpenTox::Algorithm.new(to("/fminer/#{params[:method]}/match",:full)) algorithm.metadata = { DC.title => 'fminer feature matching', DC.creator => "mguetlein@gmail.com, andreas@maunz.de", @@ -129,148 +129,145 @@ module OpenTox post '/fminer/bbrc/?' do - @@fminer=OpenTox::Algorithm::Fminer.new(url_for('/fminer/bbrc',:full)) + @@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/bbrc',:full)) @@fminer.check_params(params,5,@subjectid) task = OpenTox::Task.create( $task[:uri], @subjectid, { RDF::DC.description => "Mining BBRC features", - RDF::DC.creator => url_for('/fminer/bbrc',:full) + RDF::DC.creator => to('/fminer/bbrc',:full) } ) do |task| - begin - @@bbrc.Reset - if @@fminer.prediction_feature.feature_type == "regression" - @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! - else - bad_request_error "No accept values for "\ - "dataset '#{@@fminer.training_dataset.uri}' and "\ - "feature '#{@@fminer.prediction_feature.uri}'" unless - @@fminer.prediction_feature.accept_values - value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature) - end - @@bbrc.SetMinfreq(@@fminer.minfreq) - @@bbrc.SetType(1) if params[:feature_type] == "paths" - @@bbrc.SetBackbone(false) if params[:backbone] == "false" - @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] - @@bbrc.SetConsoleOut(false) + @@bbrc.Reset + if @@fminer.prediction_feature.feature_type == "regression" + @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! + else + bad_request_error "No accept values for "\ + "dataset '#{@@fminer.training_dataset.uri}' and "\ + "feature '#{@@fminer.prediction_feature.uri}'" unless + @@fminer.prediction_feature.accept_values + value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature) + end + @@bbrc.SetMinfreq(@@fminer.minfreq) + @@bbrc.SetType(1) if params[:feature_type] == "paths" + @@bbrc.SetBackbone(false) if params[:backbone] == "false" + @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] + @@bbrc.SetConsoleOut(false) - - feature_dataset = OpenTox::Dataset.new(nil, @subjectid) - feature_dataset.metadata = { - DC.title => "BBRC representatives", - DC.creator => url_for('/fminer/bbrc',:full), - OT.hasSource => url_for('/fminer/bbrc', :full), - } - feature_dataset.parameters = [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, - { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq }, - { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }, - { DC.title => "backbone", OT.paramValue => (params[:backbone] == "false" ? "false" : "true") } - ] - - @@fminer.compounds = [] - @@fminer.db_class_sizes = Array.new # AM: effect - @@fminer.all_activities = Hash.new # DV: for effect calculation in regression part - @@fminer.smi = [] # AM LAST: needed for matching the patterns back - - # Add data to fminer - @@fminer.add_fminer_data(@@bbrc, value_map) - g_median=@@fminer.all_activities.values.to_scale.median + + feature_dataset = OpenTox::Dataset.new(nil, @subjectid) + feature_dataset.metadata = { + DC.title => "BBRC representatives", + DC.creator => to('/fminer/bbrc',:full), + OT.hasSource => to('/fminer/bbrc', :full), + } + feature_dataset.parameters = [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, + { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq }, + { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }, + { DC.title => "backbone", OT.paramValue => (params[:backbone] == "false" ? "false" : "true") } + ] + + @@fminer.compounds = [] + @@fminer.db_class_sizes = Array.new # AM: effect + @@fminer.all_activities = Hash.new # DV: for effect calculation in regression part + @@fminer.smi = [] # AM LAST: needed for matching the patterns back + + # Add data to fminer + @@fminer.add_fminer_data(@@bbrc, value_map) + g_median=@@fminer.all_activities.values.to_scale.median - #task.progress 10 - step_width = 80 / @@bbrc.GetNoRootNodes().to_f - features_smarts = Set.new - features = Array.new - - # run @@bbrc - - # prepare to receive results as hash { c => [ [f,v], ... ] } - fminer_results = {} + #task.progress 10 + step_width = 80 / @@bbrc.GetNoRootNodes().to_f + features_smarts = Set.new + features = Array.new + + # run @@bbrc + + # prepare to receive results as hash { c => [ [f,v], ... ] } + fminer_results = {} - (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| - results = @@bbrc.MineRoot(j) - #task.progress 10+step_width*(j+1) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - - if (!@@bbrc.GetRegression) - id_arrs = f[2..-1].flatten - max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @@fminer.db_class_sizes) # f needs reversal for bbrc - effect = max+1 - else #regression part - id_arrs = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - id=id.keys[0] # extract id from hit count hash - f_arr.push(@@fminer.all_activities[id]) - end - f_median=f_arr.to_scale.median - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end + (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| + results = @@bbrc.MineRoot(j) + #task.progress 10+step_width*(j+1) + results.each do |result| + f = YAML.load(result)[0] + smarts = f[0] + p_value = f[1] + + if (!@@bbrc.GetRegression) + id_arrs = f[2..-1].flatten + max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @@fminer.db_class_sizes) # f needs reversal for bbrc + effect = max+1 + else #regression part + id_arrs = f[2] + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + id=id.keys[0] # extract id from hit count hash + f_arr.push(@@fminer.all_activities[id]) end - - #feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s - unless features_smarts.include? smarts - features_smarts << smarts - metadata = { - OT.hasSource => url_for('/fminer/bbrc', :full), - RDF.type => [OT.Feature, OT.Substructure, OT.NumericFeature], - OT.smarts => smarts.dup, - OT.pValue => p_value.to_f.abs.round(5), - OT.effect => effect - } - feature = OpenTox::Feature.find_by_title(smarts.dup,metadata) - features << feature + f_median=f_arr.to_scale.median + if g_median >= f_median + effect = 'activating' + else + effect = 'deactivating' end - - id_arrs.each { |id_count_hash| - id=id_count_hash.keys[0].to_i - count=id_count_hash.values[0].to_i - fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {} - if params[:nr_hits] == "true" - fminer_results[@@fminer.compounds[id]][feature.uri] = count - else - fminer_results[@@fminer.compounds[id]][feature.uri] = 1 - end + end + + #feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s + unless features_smarts.include? smarts + features_smarts << smarts + metadata = { + OT.hasSource => to('/fminer/bbrc', :full), + RDF.type => [OT.Feature, OT.Substructure, OT.NumericFeature], + OT.smarts => smarts.dup, + OT.pValue => p_value.to_f.abs.round(5), + OT.effect => effect } - - end # end of - end # feature parsing + feature = OpenTox::Feature.find_by_title(smarts.dup,metadata) + features << feature + end - fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a - @@fminer.training_dataset.build_feature_positions - prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri] - prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| - @@fminer.training_dataset.data_entries[idx][prediction_feature_idx] - } - fminer_noact_compounds = fminer_compounds - @@fminer.compounds + id_arrs.each { |id_count_hash| + id=id_count_hash.keys[0].to_i + count=id_count_hash.values[0].to_i + fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {} + if params[:nr_hits] == "true" + fminer_results[@@fminer.compounds[id]][feature.uri] = count + else + fminer_results[@@fminer.compounds[id]][feature.uri] = 1 + end + } + + end # end of + end # feature parsing - feature_dataset.features = features + fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a + @@fminer.training_dataset.build_feature_positions + prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri] + prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| + @@fminer.training_dataset.data_entries[idx][prediction_feature_idx] + } + fminer_noact_compounds = fminer_compounds - @@fminer.compounds + + feature_dataset.features = features + if (params[:get_target] == "true") + feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features + end + fminer_compounds.each_with_index { |c,idx| + row = [ c ] if (params[:get_target] == "true") - feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features + row = row + [ prediction_feature_all_acts[idx] ] end - fminer_compounds.each_with_index { |c,idx| - row = [ c ] - if (params[:get_target] == "true") - row = row + [ prediction_feature_all_acts[idx] ] - end - features.each { |f| - row << (fminer_results[c] ? fminer_results[c][f.uri] : nil) - } - row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c - feature_dataset << row + features.each { |f| + row << (fminer_results[c] ? fminer_results[c][f.uri] : nil) } +<<<<<<< HEAD $logger.debug "fminer found #{feature_dataset.features.size} features for #{feature_dataset.compounds.size} compounds" feature_dataset.put @subjectid @@ -281,6 +278,14 @@ module OpenTox $logger.debug "#{e.class}: #{e.message}" $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end +======= + row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c + feature_dataset << row + } + feature_dataset.put @subjectid + $logger.debug feature_dataset.uri + feature_dataset.uri +>>>>>>> 424c542130ef5b8ebd8f95403d21fd8df515bcd0 end response['Content-Type'] = 'text/uri-list' @@ -303,117 +308,109 @@ module OpenTox # @return [text/uri-list] Task URI post '/fminer/last/?' do - @@fminer=OpenTox::Algorithm::Fminer.new(url_for('/fminer/last',:full)) + @@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/last',:full)) @@fminer.check_params(params,80,@subjectid) task = OpenTox::Task.create( $task[:uri], @subjectid, { RDF::DC.description => "Mining LAST features", - RDF::DC.creator => url_for('/fminer/last',:full) + RDF::DC.creator => to('/fminer/last',:full) } ) do |task| - begin - - @@last.Reset - if @@fminer.prediction_feature.feature_type == "regression" - @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! - else - bad_request_error "No accept values for "\ - "dataset '#{fminer.training_dataset.uri}' and "\ - "feature '#{fminer.prediction_feature.uri}'" unless - @@fminer.prediction_feature.accept_values - value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature) - end - @@last.SetMinfreq(@@fminer.minfreq) - @@last.SetType(1) if params[:feature_type] == "paths" - @@last.SetConsoleOut(false) - - - feature_dataset = OpenTox::Dataset.new(nil, @subjectid) - feature_dataset.metadata = { - DC.title => "LAST representatives for " + @@fminer.training_dataset.metadata[DC.title].to_s, - DC.creator => url_for('/fminer/last',:full), - OT.hasSource => url_for('/fminer/last', :full) - } - feature_dataset.parameters = [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, - { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq }, - { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") } - ] - - @@fminer.compounds = [] - @@fminer.db_class_sizes = Array.new # AM: effect - @@fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) - @@fminer.smi = [] # needed for matching the patterns back - - # Add data to fminer - @@fminer.add_fminer_data(@@last, value_map) - #task.progress 10 - step_width = 80 / @@bbrc.GetNoRootNodes().to_f - # run @@last - xml = "" - (0 .. @@last.GetNoRootNodes()-1).each do |j| - results = @@last.MineRoot(j) - #task.progress 10+step_width*(j+1) - results.each do |result| - xml << result - end - end - - lu = LU.new # uses last-utils here - dom=lu.read(xml) # parse GraphML - smarts=lu.smarts_rb(dom,'nls') # converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) - params[:nr_hits] == "true" ? hit_count=true : hit_count=false - matches, counts = lu.match_rb(@@fminer.smi,smarts,hit_count,true) # creates instantiations - - features = [] - # prepare to receive results as hash { c => [ [f,v], ... ] } - fminer_results = {} - matches.each do |smarts, ids| - metadata, parameters = @@fminer.calc_metadata(smarts, ids, counts[smarts], @@last, nil, value_map, params) - feature = OpenTox::Feature.find_by_title(smarts.dup,metadata) - features << feature - ids.each_with_index { |id,idx| - fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {} - fminer_results[@@fminer.compounds[id]][feature.uri] = counts[smarts][idx] - } + @@last.Reset + if @@fminer.prediction_feature.feature_type == "regression" + @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! + else + bad_request_error "No accept values for "\ + "dataset '#{fminer.training_dataset.uri}' and "\ + "feature '#{fminer.prediction_feature.uri}'" unless + @@fminer.prediction_feature.accept_values + value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature) + end + @@last.SetMinfreq(@@fminer.minfreq) + @@last.SetType(1) if params[:feature_type] == "paths" + @@last.SetConsoleOut(false) + + + feature_dataset = OpenTox::Dataset.new(nil, @subjectid) + feature_dataset.metadata = { + DC.title => "LAST representatives for " + @@fminer.training_dataset.metadata[DC.title].to_s, + DC.creator => to('/fminer/last',:full), + OT.hasSource => to('/fminer/last', :full) + } + feature_dataset.parameters = [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, + { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq }, + { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") } + ] + + @@fminer.compounds = [] + @@fminer.db_class_sizes = Array.new # AM: effect + @@fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) + @@fminer.smi = [] # needed for matching the patterns back + + # Add data to fminer + @@fminer.add_fminer_data(@@last, value_map) + #task.progress 10 + step_width = 80 / @@bbrc.GetNoRootNodes().to_f + # run @@last + xml = "" + (0 .. @@last.GetNoRootNodes()-1).each do |j| + results = @@last.MineRoot(j) + #task.progress 10+step_width*(j+1) + results.each do |result| + xml << result end + end + + lu = LU.new # uses last-utils here + dom=lu.read(xml) # parse GraphML + smarts=lu.smarts_rb(dom,'nls') # converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) + params[:nr_hits] == "true" ? hit_count=true : hit_count=false + matches, counts = lu.match_rb(@@fminer.smi,smarts,hit_count,true) # creates instantiations - fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a - @@fminer.training_dataset.build_feature_positions - prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri] - prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| - @@fminer.training_dataset.data_entries[idx][prediction_feature_idx] + features = [] + # prepare to receive results as hash { c => [ [f,v], ... ] } + fminer_results = {} + matches.each do |smarts, ids| + metadata, parameters = @@fminer.calc_metadata(smarts, ids, counts[smarts], @@last, nil, value_map, params) + feature = OpenTox::Feature.find_by_title(smarts.dup,metadata) + features << feature + ids.each_with_index { |id,idx| + fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {} + fminer_results[@@fminer.compounds[id]][feature.uri] = counts[smarts][idx] } - fminer_noact_compounds = fminer_compounds - @@fminer.compounds + end + + fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a + @@fminer.training_dataset.build_feature_positions + prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri] + prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| + @@fminer.training_dataset.data_entries[idx][prediction_feature_idx] + } + fminer_noact_compounds = fminer_compounds - @@fminer.compounds - feature_dataset.features = features + feature_dataset.features = features + if (params[:get_target] == "true") + feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features + end + fminer_compounds.each_with_index { |c,idx| + row = [ c ] if (params[:get_target] == "true") - feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features + row = row + [ prediction_feature_all_acts[idx] ] end - fminer_compounds.each_with_index { |c,idx| - row = [ c ] - if (params[:get_target] == "true") - row = row + [ prediction_feature_all_acts[idx] ] - end - features.each { |f| - row << (fminer_results[c] ? fminer_results[c][f.uri] : nil) - } - row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c - feature_dataset << row + features.each { |f| + row << (fminer_results[c] ? fminer_results[c][f.uri] : nil) } - feature_dataset.put @subjectid - $logger.debug feature_dataset.uri - feature_dataset.uri - - rescue => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end - + row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c + feature_dataset << row + } + feature_dataset.put @subjectid + $logger.debug feature_dataset.uri + feature_dataset.uri end response['Content-Type'] = 'text/uri-list' diff --git a/webapp/fs.rb b/webapp/fs.rb index 670885f..6286629 100644 --- a/webapp/fs.rb +++ b/webapp/fs.rb @@ -11,14 +11,14 @@ module OpenTox # Get list of feature selection algorithms # @return [text/uri-list] URIs get '/fs/?' do - list = [ url_for('/fs/rfe', :full) ].join("\n") + "\n" + list = [ to('/fs/rfe', :full) ].join("\n") + "\n" format_output(list) end # Get representation of Recursive Feature Elimination algorithm # @return [String] Representation get "/fs/rfe/?" do - algorithm = OpenTox::Algorithm.new(url_for('/fs/rfe',:full)) + algorithm = OpenTox::Algorithm.new(to('/fs/rfe',:full)) algorithm.metadata = { DC.title => 'Recursive Feature Elimination', DC.creator => "andreas@maunz.de", diff --git a/webapp/lazar.rb b/webapp/lazar.rb index f761dce..ed4d623 100644 --- a/webapp/lazar.rb +++ b/webapp/lazar.rb @@ -29,7 +29,7 @@ module OpenTox # Get representation of lazar algorithm # @return [String] Representation get '/lazar/?' do - algorithm = OpenTox::Algorithm.new(url_for('/lazar',:full)) + algorithm = OpenTox::Algorithm.new(to('/lazar',:full)) algorithm.metadata = { DC.title => 'lazar', DC.creator => 'helma@in-silico.ch, andreas@maunz.de', @@ -67,9 +67,10 @@ module OpenTox $task[:uri], @subjectid, { RDF::DC.description => "Create lazar model", - RDF::DC.creator => url_for('/lazar',:full) + RDF::DC.creator => to('/lazar',:full) } ) do |task| +<<<<<<< HEAD begin lazar = OpenTox::Model.new(nil, @subjectid) lazar.parameters = lazar.check_params($lazar_params, params) @@ -93,9 +94,27 @@ module OpenTox $logger.debug "#{e.class}: #{e.message}" $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end +======= + + lazar = OpenTox::Model.new(nil, @subjectid) + lazar.parameters = lazar.check_params($lazar_params, params) + lazar.metadata = { + DC.title => "lazar model", + OT.dependentVariables => lazar.find_parameter_value("prediction_feature_uri"), + OT.trainingDataset => lazar.find_parameter_value("training_dataset_uri"), + OT.featureDataset => lazar.find_parameter_value("feature_dataset_uri"), + RDF.type => ( OpenTox::Feature.find(lazar.find_parameter_value("prediction_feature_uri")).feature_type == "classification" ? + [OT.Model, OTA.ClassificationLazySingleTarget] : + [OT.Model, OTA.RegressionLazySingleTarget] + ) + } + # task.progress 10 + lazar.put @subjectid + lazar.uri +>>>>>>> 424c542130ef5b8ebd8f95403d21fd8df515bcd0 end response['Content-Type'] = 'text/uri-list' - service_unavailable_error "Service unavailable" if task.cancelled? + #service_unavailable_error "Service unavailable" if task.cancelled? halt 202,task.uri.to_s+"\n" end @@ -126,24 +145,59 @@ module OpenTox $task[:uri], @subjectid, { +<<<<<<< HEAD RDF::DC.description => "Apply lazar model", RDF::DC.creator => url_for('/lazar/predict',:full) +======= + RDF::DC.description => "Create lazar model", + RDF::DC.creator => to('/lazar/predict',:full) +>>>>>>> 424c542130ef5b8ebd8f95403d21fd8df515bcd0 } ) do |task| - begin - if params[:dataset_uri] - compounds = OpenTox::Dataset.find(params[:dataset_uri]).compounds - else - compounds = [ OpenTox::Compound.new(params[:compound_uri]) ] + if params[:dataset_uri] + compounds = OpenTox::Dataset.find(params[:dataset_uri]).compounds + else + compounds = [ OpenTox::Compound.new(params[:compound_uri]) ] + end + + compounds.each { |query_compound| + params[:compound_uri] = query_compound.uri # AM: store compound in params hash + unless @prediction_dataset # AM: only once for dataset predictions + @prediction_dataset = OpenTox::Dataset.new(nil, @subjectid) + + @model_params_hash = $lazar_params.inject({}){ |h,p| + h[p] = params[p].to_s unless params[p].nil? + h + } + @model = OpenTox::Model.new(@model_params_hash) + + @prediction_dataset.metadata = { + DC.title => "Lazar prediction", + DC.creator => @uri.to_s, + OT.hasSource => @uri.to_s, + OT.dependentVariables => @model_params_hash["prediction_feature_uri"], + OT.predictedVariables => @model_params_hash["prediction_feature_uri"] + } + + puts "Loading t dataset" + @training_dataset = OpenTox::Dataset.find(params[:training_dataset_uri], @subjectid) + @prediction_feature = OpenTox::Feature.find(params[:prediction_feature_uri],@subjectid) + #@training_dataset = OpenTox::Dataset.find(params[:training_dataset], @subjectid) + #@prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid) + @confidence_feature = OpenTox::Feature.find_by_title("confidence", {RDF.type => [RDF::OT.NumericFeature]}) + @similarity_feature = OpenTox::Feature.find_by_title("similarity", {RDF.type => [RDF::OT.NumericFeature]}) + @prediction_dataset.features = [ @prediction_feature, @confidence_feature, @similarity_feature ] end + + database_activity = @training_dataset.database_activity(params) + if database_activity - compounds.each { |query_compound| - params[:compound_uri] = query_compound.uri # AM: store compound in params hash - unless @prediction_dataset # AM: only once for dataset predictions - @prediction_dataset = OpenTox::Dataset.new(nil, @subjectid) + prediction_value = database_activity.to_f + confidence_value = 1.0 +<<<<<<< HEAD @model_params_hash = $lazar_params.inject({}){ |h,p| h[p] = params[p].to_s unless params[p].nil? h @@ -173,8 +227,47 @@ module OpenTox orig_value = database_activity.to_f predicted_value = orig_value confidence_value = 1.0 +======= + else + puts "Creating prediction" + @model = OpenTox::Model.new(@model_params_hash) + + unless @feature_dataset + puts "Loading f dataset" + @feature_dataset = OpenTox::Dataset.find(params[:feature_dataset_uri], @subjectid) + #@feature_dataset = OpenTox::Dataset.find(params[:feature_dataset], @subjectid) + end +>>>>>>> 424c542130ef5b8ebd8f95403d21fd8df515bcd0 + case @feature_dataset.find_parameter_value("nr_hits") + when "true" then @model.feature_calculation_algorithm = "match_hits" + when "false" then @model.feature_calculation_algorithm = "match" + end + puts @model.feature_calculation_algorithm + pc_type = @feature_dataset.find_parameter_value("pc_type") + @model.pc_type = pc_type unless pc_type.nil? + lib = @feature_dataset.find_parameter_value("lib") + @model.lib = lib unless lib.nil? + + print "cosine transformation ..." + # AM: transform to cosine space + @model.min_sim = (@model.min_sim.to_f*2.0-1.0).to_s if @model.similarity_algorithm =~ /cosine/ + puts "finished" + + #puts @model.pc_type + puts @feature_dataset.features.size + if @feature_dataset.features.size > 0 + compound_params = { + :compound => query_compound, + :feature_dataset => @feature_dataset, + :pc_type => @model.pc_type, + :lib => @model.lib + } + # use send, not eval, for calling the method (good backtrace) + $logger.debug "Calculating q fps" + compound_fingerprints = OpenTox::Algorithm::FeatureValues.send( @model.feature_calculation_algorithm, compound_params, @subjectid ) else +<<<<<<< HEAD @model = OpenTox::Model.new(@model_params_hash) unless @feature_dataset @@ -230,9 +323,41 @@ module OpenTox $logger.debug "Prediction: '#{predicted_value}'" $logger.debug "Confidence: '#{confidence_value}'" +======= + bad_request_error "No features found" +>>>>>>> 424c542130ef5b8ebd8f95403d21fd8df515bcd0 end + @model.add_data(@training_dataset, @feature_dataset, @prediction_feature, compound_fingerprints, @subjectid) + mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(@model) + mtf.transform + $logger.debug "Predicting q" + prediction = OpenTox::Algorithm::Neighbors.send(@model.prediction_algorithm, { :props => mtf.props, + :acts => mtf.acts, + :sims => mtf.sims, + :value_map => @training_dataset.value_map(@prediction_feature), + :min_train_performance => @model.min_train_performance + } ) + prediction_value = prediction[:prediction].to_f + confidence_value = prediction[:confidence].to_f + + # AM: transform to original space + confidence_value = ((confidence_value+1.0)/2.0).abs if @model.similarity_algorithm =~ /cosine/ + prediction_value = @training_dataset.value_map(@prediction_feature)[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification" + + $logger.debug "Prediction: '#{prediction_value}'" + $logger.debug "Confidence: '#{confidence_value}'" + end + + @prediction_dataset << [ + query_compound, + prediction_value, + confidence_value, + nil + ] + @model.neighbors.each { |neighbor| @prediction_dataset << [ +<<<<<<< HEAD query_compound, orig_value, predicted_value, @@ -249,24 +374,28 @@ module OpenTox ] } +======= + OpenTox::Compound.new(neighbor[:compound]), + @training_dataset.value_map(@prediction_feature)[neighbor[:activity]], + nil, + neighbor[:similarity] + ] +>>>>>>> 424c542130ef5b8ebd8f95403d21fd8df515bcd0 } - @prediction_dataset.parameters = $lazar_params.collect { |p| - {DC.title => p, OT.paramValue => @model.instance_variable_get("@#{p}")} unless @model.instance_variable_get("@#{p}").nil? - } + } - @prediction_dataset.put - $logger.debug @prediction_dataset.uri - @prediction_dataset.uri + @prediction_dataset.parameters = $lazar_params.collect { |p| + {DC.title => p, OT.paramValue => @model.instance_variable_get("@#{p}")} unless @model.instance_variable_get("@#{p}").nil? + } - rescue => e - $logger.debug "#{e.class}: #{e.message}" - $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + @prediction_dataset.put + $logger.debug @prediction_dataset.uri + @prediction_dataset.uri end response['Content-Type'] = 'text/uri-list' - service_unavailable_error "Service unavailable" if task.cancelled? + #service_unavailable_error "Service unavailable" if task.cancelled? halt 202,task.uri.to_s+"\n" end diff --git a/webapp/sinatra.rb b/webapp/sinatra.rb index d40633f..cf0e7c5 100644 --- a/webapp/sinatra.rb +++ b/webapp/sinatra.rb @@ -8,9 +8,6 @@ module OpenTox class Application < Service - # Get url_for support - helpers Sinatra::UrlForHelper - # Put any code here that should be executed immediately before # request is processed before { |