diff options
author | mr <mr@mrautenberg.de> | 2011-08-04 18:37:33 +0200 |
---|---|---|
committer | mr <mr@mrautenberg.de> | 2011-08-04 18:37:33 +0200 |
commit | 6f26ea70b05b69fb69a102fb4cec688338c1f7ff (patch) | |
tree | ce05acc8adb8c64ae8cc1ea997d35744b062e35e | |
parent | 6b9e012576857fbc6c51cd86581cca792f367cdf (diff) | |
parent | 7a13c2da03220ad6716fe7da5bfa3403c873d7d1 (diff) |
Merge branch 'release/v2.1.0'v2.1.0
-rw-r--r-- | Rakefile | 85 | ||||
-rw-r--r-- | VERSION | 2 | ||||
-rw-r--r-- | lib/algorithm.rb | 877 | ||||
-rw-r--r-- | lib/compound.rb | 40 | ||||
-rw-r--r-- | lib/config/config_ru.rb | 2 | ||||
-rw-r--r-- | lib/dataset.rb | 75 | ||||
-rw-r--r-- | lib/environment.rb | 6 | ||||
-rw-r--r-- | lib/feature.rb | 28 | ||||
-rw-r--r-- | lib/helper.rb | 3 | ||||
-rw-r--r-- | lib/model.rb | 263 | ||||
-rw-r--r-- | lib/opentox-ruby.rb | 2 | ||||
-rw-r--r-- | lib/overwrite.rb | 3 | ||||
-rw-r--r-- | lib/parser.rb | 316 | ||||
-rw-r--r-- | lib/rest_client_wrapper.rb | 5 | ||||
-rw-r--r-- | lib/serializer.rb | 34 | ||||
-rw-r--r-- | lib/task.rb | 11 | ||||
-rw-r--r-- | lib/to-html.rb | 107 | ||||
-rw-r--r-- | lib/validation.rb | 122 |
18 files changed, 1581 insertions, 400 deletions
@@ -8,53 +8,46 @@ begin gem.summary = %Q{Ruby wrapper for the OpenTox REST API} gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)} gem.email = "helma@in-silico.ch" - gem.homepage = "http://github.com/helma/opentox-ruby" + gem.homepage = "http://github.com/opentox/opentox-ruby" gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"] - # dependencies - [ "sinatra", - "emk-sinatra-url-for", - "sinatra-respond_to", - "sinatra-static-assets", - "rest-client", - "rack", - "rack-contrib", - "rack-flash", - "nokogiri", - "rubyzip", - "roo", - "spreadsheet", - "google-spreadsheet-ruby", - "yajl-ruby", - "tmail", - "rinruby", - "ohm", - "ohm-contrib", - "SystemTimer", - "rjb", - #valiation-gems - "dm-core", - "dm-serializer", - "dm-timestamps", - "dm-types", - "dm-migrations", - "dm-validations", - "dm-sqlite-adapter" - ].each { |dep| gem.add_dependency dep } -=begin - [ "dm-core", - 'dm-serializer', - 'dm-timestamps', - 'dm-types', - 'dm-migrations', - "dm-mysql-adapter", - "dm-validations", - ].each {|dep| gem.add_dependency dep, ">= 1" } -=end - #valiation-gem - gem.add_dependency "haml", ">=3" - # validation-gems - gem.add_dependency "ruby-plot", "~>0.4.0" - ['jeweler'].each { |dep| gem.add_development_dependency dep } + # dependencies with versions + gem.add_dependency "sinatra", "=1.2.6" + gem.add_dependency "emk-sinatra-url-for", "=0.2.1" + gem.add_dependency "sinatra-respond_to", "=0.7.0" + gem.add_dependency "sinatra-static-assets", "=0.5.0" + gem.add_dependency "rest-client", "=1.6.1" + gem.add_dependency "rack", "=1.3.1" + gem.add_dependency "rack-contrib", "=1.1.0" + gem.add_dependency "rack-flash", "=0.1.1" + gem.add_dependency "nokogiri", "=1.4.4" + gem.add_dependency "rubyzip", "=0.9.4" + gem.add_dependency "roo", "=1.9.3" + gem.add_dependency "spreadsheet", "=0.6.5.4" + gem.add_dependency "google-spreadsheet-ruby", "=0.1.5" + gem.add_dependency "yajl-ruby", "=0.8.2" + #gem.add_dependency "mail", "=2.3.0" + gem.add_dependency "rinruby", "=2.0.2" + gem.add_dependency "ohm", "=0.1.3" + gem.add_dependency "ohm-contrib", "=0.1.1" + gem.add_dependency "SystemTimer", "=1.2.3" + gem.add_dependency "rjb", "=1.3.4" + gem.add_dependency "haml", "=3.1.1" + # for headless browser tests + gem.add_dependency "akephalos", "=0.2.5" + #valiation-gems + gem.add_dependency "dm-core", "=1.1.0" + gem.add_dependency "dm-serializer", "=1.1.0" + gem.add_dependency "dm-timestamps", "=1.1.0" + gem.add_dependency "dm-types", "=1.1.0" + gem.add_dependency "dm-migrations", "=1.1.0" + gem.add_dependency "dm-validations", "=1.1.0" + gem.add_dependency "dm-sqlite-adapter", "=1.1.0" + gem.add_dependency "ruby-plot", "=0.5.0" + gem.add_dependency "gsl", "=1.14.7" + gem.add_dependency "statsample", "=1.1.0" + #gem.add_dependency "statsample-optimization", "=2.1.0" + + gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] end Jeweler::GemcutterTasks.new @@ -1 +1 @@ -2.0.1
\ No newline at end of file +2.1.0
\ No newline at end of file diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 7fbe0dc..85b54ab 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -3,6 +3,8 @@ # avoids compiling R with X R = nil require "rinruby" +require "statsample" +require 'uri' module OpenTox @@ -16,6 +18,7 @@ module OpenTox # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly # @return [String] URI of new resource (dataset, model, ...) def run(params=nil, waiting_task=nil) + LOGGER.info "Running algorithm '"+@uri.to_s+"' with params: "+params.inspect RestClientWrapper.post(@uri, params, {:accept => 'text/uri-list'}, waiting_task).to_s end @@ -45,12 +48,75 @@ module OpenTox end # Fminer algorithms (https://github.com/amaunz/fminer2) - module Fminer + class Fminer include Algorithm + attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi + + def check_params(params,per_mil,subjectid=nil) + raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + @prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid + @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid + raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature]) + + unless params[:min_frequency].nil? + @minfreq=params[:min_frequency].to_i + raise "Minimum frequency must be a number >0!" unless @minfreq>0 + else + @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + end + end + + def add_fminer_data(fminer_instance, params, value_map) + + id = 1 # fminer start id is not 0 + @training_dataset.data_entries.each do |compound,entry| + begin + smiles = OpenTox::Compound.smiles(compound.to_s) + rescue + LOGGER.warn "No resource for #{compound.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{compound.to_s}." + next + end + + value_map=params[:value_map] unless params[:value_map].nil? + entry.each do |feature,values| + if feature == @prediction_feature.uri + values.each do |value| + if value.nil? + LOGGER.warn "No #{feature} activity for #{compound.to_s}." + else + if @prediction_feature.feature_type == "classification" + activity= value_map.invert[value].to_i # activities are mapped to 1..n + @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect + elsif @prediction_feature.feature_type == "regression" + activity= value.to_f + end + begin + fminer_instance.AddCompound(smiles,id) + fminer_instance.AddActivity(activity, id) + @all_activities[id]=activity # DV: insert global information + @compounds[id] = compound + @smi[id] = smiles + id += 1 + rescue Exception => e + LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" + LOGGER.warn e.backtrace + end + end + end + end + end + end + end + + end # Backbone Refinement Class mining (http://bbrc.maunz.de/) - class BBRC - include Fminer + class BBRC < Fminer # Initialize bbrc algorithm def initialize(subjectid=nil) super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") @@ -59,8 +125,7 @@ module OpenTox end # LAtent STructure Pattern Mining (http://last-pm.maunz.de) - class LAST - include Fminer + class LAST < Fminer # Initialize last algorithm def initialize(subjectid=nil) super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last") @@ -68,7 +133,6 @@ module OpenTox end end - end # Create lazar prediction model class Lazar @@ -90,19 +154,34 @@ module OpenTox # @param [Array] features_a Features of first compound # @param [Array] features_b Features of second compound # @param [optional, Hash] weights Weights for all features + # @param [optional, Hash] params Keys: `:training_compound, :compound, :training_compound_features_hits, :nr_hits, :compound_features_hits` are required # @return [Float] (Weighted) tanimoto similarity - def self.tanimoto(features_a,features_b,weights=nil) + def self.tanimoto(features_a,features_b,weights=nil,params=nil) common_features = features_a & features_b all_features = (features_a + features_b).uniq - common_p_sum = 0.0 + #LOGGER.debug "dv --------------- common: #{common_features}, all: #{all_features}" if common_features.size > 0 if weights - common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} - all_p_sum = 0.0 - all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} + #LOGGER.debug "nr_hits: #{params[:nr_hits]}" + if !params.nil? && params[:nr_hits] + params[:weights] = weights + params[:mode] = "min" + params[:features] = common_features + common_p_sum = Algorithm.p_sum_support(params) + params[:mode] = "max" + params[:features] = all_features + all_p_sum = Algorithm.p_sum_support(params) + else + common_p_sum = 0.0 + common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} + all_p_sum = 0.0 + all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} + end + #LOGGER.debug "common_p_sum: #{common_p_sum}, all_p_sum: #{all_p_sum}, c/a: #{common_p_sum/all_p_sum}" common_p_sum/all_p_sum else - common_features.to_f/all_features + #LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}" + common_features.size.to_f/all_features.size.to_f end else 0.0 @@ -132,65 +211,300 @@ module OpenTox end end + # Structural Graph Clustering by TU Munich + # Finds clusters similar to a query structure in a given training dataset + # May be queried for cluster membership of an unknown compound + class StructuralClustering + attr_accessor :training_dataset_uri, :training_threshold, :query_dataset_uri, :query_threshold, :target_clusters_array + + # @params[String] Training dataset_uri + # @params[Float] Similarity threshold for training (optional) + # @params[String] Cluster service uri (no AA) + def initialize training_dataset_uri, training_threshold=0.8, cluster_service_uri = "http://opentox-dev.informatik.tu-muenchen.de:8080/OpenTox/algorithm/StructuralClustering" + + if (training_dataset_uri =~ URI::regexp).nil? || (cluster_service_uri =~ URI::regexp).nil? + raise "Invalid URI." + end + @training_dataset_uri = training_dataset_uri + if !OpenTox::Algorithm.numeric? training_threshold || training_threshold <0 || training_threshold >1 + raise "Training threshold out of bounds." + end + @training_threshold = training_threshold.to_f + + # Train a cluster model + params = {:dataset_uri => @training_dataset_uri, :threshold => @training_threshold } + @cluster_model_uri = OpenTox::RestClientWrapper.post cluster_service_uri, params + cluster_model_rdf = OpenTox::RestClientWrapper.get @cluster_model_uri + @datasets = OpenTox::Parser::Owl.from_rdf cluster_model_rdf, OT.Dataset, true # must extract OT.Datasets from model + + # Process parsed OWL objects + @clusterid_dataset_map = Hash.new + @datasets.each { |d| + begin + d.metadata[OT.hasSource]["Structural Clustering cluster "] = "" # must parse in metadata for string (not elegant) + @clusterid_dataset_map[d.metadata[OT.hasSource].to_i] = d.uri + rescue Exception => e + # ignore other entries! + end + } + end + + # Whether a model has been trained + def trained? + !@cluster_model_uri.nil? + end + + # Instance query: clusters for a compound + # @params[String] Query compound + # @params[Float] Similarity threshold for query to clusters (optional) + def get_clusters query_compound_uri, query_threshold = 0.5 + + if !OpenTox::Algorithm.numeric? query_threshold || query_threshold <0 || query_threshold >1 + raise "Query threshold out of bounds." + end + @query_threshold = query_threshold.to_f + + + # Preparing a query dataset + query_dataset = OpenTox::Dataset.new + @query_dataset_uri = query_dataset.save + query_dataset = OpenTox::Dataset.find @query_dataset_uri + query_dataset.add_compound query_compound_uri + @query_dataset_uri = query_dataset.save + + # Obtaining a clustering for query compound + params = { :dataset_uri => @query_dataset_uri, :threshold => @query_threshold } + cluster_query_dataset_uri = OpenTox::RestClientWrapper.post @cluster_model_uri, params + cluster_query_dataset = OpenTox::Dataset.new cluster_query_dataset_uri + cluster_query_dataset.load_all + + # Reading cluster ids for features from metadata + feature_clusterid_map = Hash.new + pattern="Prediction feature for cluster assignment " # must parse for string in metadata (not elegant) + cluster_query_dataset.features.each { |feature_uri,metadata| + metadata[DC.title][pattern]="" + feature_clusterid_map[feature_uri] = metadata[DC.title].to_i + } + + # Integrity check + unless cluster_query_dataset.compounds.size == 1 + raise "Number of predicted compounds is != 1." + end + + # Process data entry + query_compound_uri = cluster_query_dataset.compounds[0] + @target_clusters_array = Array.new + cluster_query_dataset.features.keys.each { |cluster_membership_feature| + + # Getting dataset URI for cluster + target_cluster = feature_clusterid_map[cluster_membership_feature] + dataset = @clusterid_dataset_map[target_cluster] + + # Finally look up presence + data_entry = cluster_query_dataset.data_entries[query_compound_uri] + present = data_entry[cluster_membership_feature][0] + + # Store result + @target_clusters_array << dataset if present > 0.5 # 0.0 for absence, 1.0 for presence + } + end + + end + module Neighbors + # Local multi-linear regression (MLR) prediction from neighbors. + # Uses propositionalized setting. + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_mlr_prop(params) + + confidence=0.0 + prediction=nil + + if params[:neighbors].size>0 + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect { |n| act = n[:activity].to_f } + sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } + LOGGER.debug "Local MLR (Propositionalization / GSL)." + prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) + transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") + prediction = transformer.values[0] + prediction = nil if prediction.infinite? || params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + params[:conf_stdev] = false if params[:conf_stdev].nil? + confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) + confidence = nil if prediction.nil? + end + {:prediction => prediction, :confidence => confidence} + + end + + # Multi-linear regression weighted by similarity. + # Objective Feature Selection, Principal Components Analysis, Scaling of Axes. + # @param [Hash] params Keys `:n_prop, :q_prop, :sims, :acts` are required + # @return [Numeric] A prediction value. + def self.mlr(params) + + # GSL matrix operations: + # to_a : row-wise conversion to nested array + # + # Statsample operations (build on GSL): + # to_scale: convert into Statsample format + + begin + n_prop = params[:n_prop].collect { |v| v } + q_prop = params[:q_prop].collect { |v| v } + n_prop << q_prop # attach q_prop + nr_cases, nr_features = get_sizes n_prop + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + + # Principal Components Analysis + LOGGER.debug "PCA..." + pca = OpenTox::Algorithm::Transform::PCA.new(data_matrix) + data_matrix = pca.data_transformed_matrix + + # Attach intercept column to data + intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1) + data_matrix = data_matrix.horzcat(intercept) + (0..data_matrix.size2-2).each { |i| + autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i)) + data_matrix.col(i)[0..data_matrix.size1-1] = autoscaler.scaled_values + } + + # Detach query instance + n_prop = data_matrix.to_a + q_prop = n_prop.pop + nr_cases, nr_features = get_sizes n_prop + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + + # model + support vectors + LOGGER.debug "Creating MLR model ..." + c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, params[:sims].to_scale.to_gsl, params[:acts].to_scale.to_gsl) + GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0] + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + end + + end + # Classification with majority vote from neighbors weighted by similarity - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` - # @param [optional] params Ignored (only for compatibility with local_svm_regression) - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.weighted_majority_vote(neighbors,params={}) - conf = 0.0 + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.weighted_majority_vote(params) + + neighbor_contribution = 0.0 + confidence_sum = 0.0 confidence = 0.0 - neighbors.each do |neighbor| - case neighbor[:activity].to_s - when 'true' - conf += Algorithm.gauss(neighbor[:similarity]) - when 'false' - conf -= Algorithm.gauss(neighbor[:similarity]) + prediction = nil + + params[:neighbors].each do |neighbor| + neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f + neighbor_contribution += neighbor[:activity].to_f * neighbor_weight + + if params[:value_map].size == 2 # AM: provide compat to binary classification: 1=>false 2=>true + case neighbor[:activity] + when 1 + confidence_sum -= neighbor_weight + when 2 + confidence_sum += neighbor_weight + end + else + confidence_sum += neighbor_weight end end - if conf > 0.0 - prediction = true - elsif conf < 0.0 - prediction = false - else - prediction = nil - end - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence.abs} + + if params[:value_map].size == 2 + if confidence_sum >= 0.0 + prediction = 2 unless params[:neighbors].size==0 + elsif confidence_sum < 0.0 + prediction = 1 unless params[:neighbors].size==0 + end + else + prediction = (neighbor_contribution/confidence_sum).round unless params[:neighbors].size==0 # AM: new multinomial prediction + end + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." unless prediction.nil? + confidence = confidence_sum/params[:neighbors].size if params[:neighbors].size > 0 + LOGGER.debug "Confidence is: '" + confidence.to_s + "'." unless prediction.nil? + return {:prediction => prediction, :confidence => confidence.abs} end # Local support vector regression from neighbors - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors,params ) - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors - conf = sims.inject{|sum,x| sum + x } - - # AM: Control log taking - take_logs=true - neighbors.each do |n| - if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) - take_logs = false - end + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm_regression(params) + + confidence = 0.0 + prediction = nil + if params[:neighbors].size>0 + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect{ |n| n[:activity].to_f } + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } + prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr") + transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") + prediction = transformer.values[0] + prediction = nil if prediction.infinite? || params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + params[:conf_stdev] = false if params[:conf_stdev].nil? + confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) + confidence = nil if prediction.nil? end - acts = neighbors.collect do |n| - act = n[:activity] - take_logs ? Math.log10(act.to_f) : act.to_f - end # activities of neighbors for supervised learning + {:prediction => prediction, :confidence => confidence} + + end + + # Local support vector classification from neighbors + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm_classification(params) - neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches + confidence = 0.0 + prediction = nil + if params[:neighbors].size>0 + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect { |n| act = n[:activity] } + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + prediction = props.nil? ? local_svm(acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc") + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + params[:conf_stdev] = false if params[:conf_stdev].nil? + confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) + end + {:prediction => prediction, :confidence => confidence} + + end + + + # Local support vector prediction from neighbors. + # Uses pre-defined Kernel Matrix. + # Not to be called directly (use local_svm_regression or local_svm_classification). + # @param [Array] acts, activities for neighbors. + # @param [Array] sims, similarities for neighbors. + # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm(acts, sims, type, params) + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." + neighbor_matches = params[:neighbors].collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found" + + prediction = nil + if Algorithm::zero_variance? acts + prediction = acts[0] else # gram matrix (0..(neighbor_matches.length-1)).each do |i| + neighbor_i_hits = params[:fingerprints][params[:neighbors][i][:compound]] gram_matrix[i] = [] unless gram_matrix[i] # upper triangle ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + neighbor_j_hits= params[:fingerprints][params[:neighbors][j][:compound]] + sim_params = {} + if params[:nr_hits] + sim_params[:nr_hits] = true + sim_params[:compound_features_hits] = neighbor_i_hits + sim_params[:training_compound_features_hits] = neighbor_j_hits + end + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values], sim_params)") gram_matrix[i][j] = Algorithm.gauss(sim) gram_matrix[j] = [] unless gram_matrix[j] gram_matrix[j][i] = gram_matrix[i][j] # lower triangle @@ -198,6 +512,7 @@ module OpenTox gram_matrix[i][i] = 1.0 end + #LOGGER.debug gram_matrix.to_yaml @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed @@ -208,27 +523,171 @@ module OpenTox @r.y = acts @r.sims = sims - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - @r.eval "p<-predict(model,sims)[1,1]" - prediction = 10**(@r.p.to_f) if take_logs - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - @r.quit # free R + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,sims)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,sims)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} - + prediction + end + + # Local support vector prediction from neighbors. + # Uses propositionalized setting. + # Not to be called directly (use local_svm_regression or local_svm_classification). + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Array] acts, activities for neighbors. + # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). + # @return [Numeric] A prediction value. + def self.local_svm_prop(props, acts, type) + + LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. + + prediction = nil + if Algorithm::zero_variance? acts + prediction = acts[0] + else + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.n_prop = n_prop.flatten + @r.n_prop_x_size = n_prop.size + @r.n_prop_y_size = n_prop[0].size + @r.y = acts + @r.q_prop = q_prop + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-matrix(y)" + @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)" + @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,q_prop)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,q_prop)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end + prediction + end + + # Get confidence for regression, with standard deviation of neighbor activity if conf_stdev is set. + # @param[Hash] Required keys: :sims, :acts, :neighbors, :conf_stdev + # @return[Float] Confidence + def self.get_confidence(params) + if params[:conf_stdev] + sim_median = params[:sims].to_scale.median + if sim_median.nil? + confidence = nil + else + standard_deviation = params[:acts].to_scale.standard_deviation_sample + confidence = (sim_median*Math.exp(-1*standard_deviation)).abs + if confidence.nan? + confidence = nil + end + end + else + conf = params[:sims].inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size + end + LOGGER.debug "Confidence is: '" + confidence.to_s + "'." + return confidence + end + + # Get X and Y size of a nested Array (Matrix) + def self.get_sizes(matrix) + begin + nr_cases = matrix.size + nr_features = matrix[0].size + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + #puts "NRC: #{nr_cases}, NRF: #{nr_features}" + [ nr_cases, nr_features ] + end + + # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) + # Same for the vector describing the query compound + # @param[Array] neighbors. + # @param[OpenTox::Compound] query compound. + # @param[Array] Dataset Features. + # @param[Array] Fingerprints of neighbors. + # @param[Float] p-values of Features. + def self.get_props (params) + matrix = Array.new + begin + params[:neighbors].each do |n| + n = n[:compound] + row = [] + params[:features].each do |f| + if ! params[:fingerprints][n].nil? + row << (params[:fingerprints][n].include?(f) ? (params[:p_values][f] * params[:fingerprints][n][f]) : 0.0) + else + row << 0.0 + end + end + matrix << row + end + row = [] + params[:features].each do |f| + if params[:nr_hits] + compound_feature_hits = params[:compound].match_hits([f]) + row << (compound_feature_hits.size == 0 ? 0.0 : (params[:p_values][f] * compound_feature_hits[f])) + else + row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f]) + end + end + rescue Exception => e + LOGGER.debug "get_props failed with '" + $! + "'" + end + [ matrix, row ] end end @@ -250,6 +709,195 @@ module OpenTox def features(dataset_uri,compound_uri) end end + + module Transform + include Algorithm + + # The transformer that inverts values. + # 1/x is used, after values have been moved >= 1. + class Inverter + attr_accessor :offset, :values + + # @params[Array] Values to transform. + # @params[Float] Offset for restore. + def initialize *args + case args.size + when 1 + begin + values=args[0] + raise "Cannot transform, values empty." if @values.size==0 + @values = values.collect { |v| -1.0 * v } + @offset = 1.0 - @values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values.collect! { |v| v - @offset } # slide >1 + @values.collect! { |v| 1 / v } # invert to [0,1] + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 1 / v } + @values.collect! { |v| v + @offset } + @values.collect! { |v| -1.0 * v } + end + end + end + + # The transformer that takes logs. + # Log10 is used, after values have been moved > 0. + class Log10 + attr_accessor :offset, :values + + # @params[Array] Values to transform / restore. + # @params[Float] Offset for restore. + def initialize *args + @distance_to_zero = 0.000000001 # 1 / 1 billion + case args.size + when 1 + begin + values=args[0] + raise "Cannot transform, values empty." if values.size==0 + @offset = values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = values.collect { |v| v - @offset } # slide > anchor + @values.collect! { |v| v + @distance_to_zero } # + @values.collect! { |v| Math::log10 v } # log10 (can fail) + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 10**v } + @values.collect! { |v| v - @distance_to_zero } + @values.collect! { |v| v + @offset } + end + end + end + + # The transformer that does nothing (No OPeration). + class NOP + attr_accessor :offset, :values + + # @params[Array] Values to transform / restore. + # @params[Float] Offset for restore. + def initialize *args + @offset = 0.0 + @distance_to_zero = 0.0 + case args.size + when 1 + @values = args[0] + when 2 + @values = args[0] + end + end + end + + + # Auto-Scaler for Arrays + # Center on mean and divide by standard deviation + class AutoScale + attr_accessor :scaled_values, :mean, :stdev + + # @params[Array] Values to transform. + def initialize values + @scaled_values = values + @mean = @scaled_values.to_scale.mean + @stdev = @scaled_values.to_scale.standard_deviation_sample + @scaled_values = @scaled_values.collect {|vi| vi - @mean } + @scaled_values.collect! {|vi| vi / @stdev } unless @stdev == 0.0 + end + end + + # Principal Components Analysis + # Statsample Library (http://ruby-statsample.rubyforge.org/) by C. Bustos + class PCA + attr_accessor :data_matrix, :data_transformed_matrix, :eigenvector_matrix, :eigenvalue_sums, :autoscaler + + # Creates a transformed dataset as GSL::Matrix. + # @param [GSL::Matrix] Data matrix. + # @param [Float] Compression ratio from [0,1]. + # @return [GSL::Matrix] Data transformed matrix. + def initialize data_matrix, compression=0.05 + begin + @data_matrix = data_matrix + @compression = compression.to_f + @stdev = Array.new + @mean = Array.new + + # Objective Feature Selection + raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 + @data_matrix_selected = nil + (0..@data_matrix.size2-1).each { |i| + if !Algorithm::zero_variance?(@data_matrix.col(i).to_a) + if @data_matrix_selected.nil? + @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) + @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) + else + @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1)) + end + end + } + raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2) + + # Scaling of Axes + @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @data_matrix_selected.size2) + (0..@data_matrix_selected.size2-1).each { |i| + @autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix_selected.col(i)) + @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = @autoscaler.scaled_values + @stdev << @autoscaler.stdev + @mean << @autoscaler.mean + } + + data_matrix_hash = Hash.new + (0..@data_matrix_scaled.size2-1).each { |i| + column_view = @data_matrix_scaled.col(i) + data_matrix_hash[i] = column_view.to_scale + } + dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9 + cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash) + pca=Statsample::Factor::PCA.new(cor_matrix) + pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? } + @eigenvalue_sums = Array.new + (0..dataset_hash.fields.size-1).each { |i| + @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } + } + eigenvectors_selected = Array.new + pca.eigenvectors.each_with_index { |ev, i| + if (@eigenvalue_sums[i] <= ((1.0-@compression)*dataset_hash.fields.size)) || (eigenvectors_selected.size == 0) + eigenvectors_selected << ev.to_a + end + } + @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, dataset_hash.fields.size).transpose + dataset_matrix = dataset_hash.to_gsl.transpose + @data_transformed_matrix = (@eigenvector_matrix.transpose * dataset_matrix).transpose + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end + + # Restores data in the original feature space (possibly with compression loss). + # @return [GSL::Matrix] Data matrix. + def restore + begin + data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca + # reverse scaling + (0..data_matrix_restored.size2-1).each { |i| + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] *= @stdev[i] unless @stdev[i] == 0.0 + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i] + } + data_matrix_restored + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end + + end + + end # Gauss kernel # @return [Float] @@ -257,16 +905,85 @@ module OpenTox d = 1.0 - x.to_f Math.exp(-(d*d)/(2*sigma*sigma)) end + + # For symbolic features + # @param [Array] Array to test, must indicate non-occurrence with 0. + # @return [Boolean] Whether the feature is singular or non-occurring or present everywhere. + def self.isnull_or_singular?(array) + nr_zeroes = array.count(0) + return (nr_zeroes == array.size) || # remove non-occurring feature + (nr_zeroes == array.size-1) || # remove singular feature + (nr_zeroes == 0) # also remove feature present everywhere + end + + # Numeric value test + # @param[Object] value + # @return [Boolean] Whether value is a number + def self.numeric?(value) + true if Float(value) rescue false + end + + # For symbolic features + # @param [Array] Array to test, must indicate non-occurrence with 0. + # @return [Boolean] Whether the feature has variance zero. + def self.zero_variance?(array) + return (array.to_scale.variance_sample == 0.0) + end - # Median of an array + # Sum of an array for Arrays. # @param [Array] Array with values - # @return [Float] Median - def self.median(array) - return nil if array.empty? - array.sort! - m_pos = array.size / 2 - return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2 + # @return [Integer] Sum of size of values + def self.sum_size(array) + sum=0 + array.each { |e| sum += e.size } + return sum + end + + # Minimum Frequency + # @param [Integer] per-mil value + # return [Integer] min-frequency + def self.min_frequency(training_dataset,per_mil) + minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + minfreq = 2 unless minfreq > 2 + Integer (minfreq) end + # Effect calculation for classification + # @param [Array] Array of occurrences per class in the form of Enumerables. + # @param [Array] Array of database instance counts per class. + def self.effect(occurrences, db_instances) + max=0 + max_value=0 + nr_o = self.sum_size(occurrences) + nr_db = db_instances.to_scale.sum + + occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity. + actual = o.size.to_f/nr_o + expected = db_instances[i].to_f/nr_db + if actual > expected + if ((actual - expected) / actual) > max_value + max_value = (actual - expected) / actual # 'Schleppzeiger' + max = i + end + end + } + max + end + + # Returns Support value of an fingerprint + # @param [Hash] params Keys: `:compound_features_hits, :weights, :training_compound_features_hits, :features, :nr_hits:, :mode` are required + # return [Numeric] Support value + def self.p_sum_support(params) + p_sum = 0.0 + params[:features].each{|f| + compound_hits = params[:compound_features_hits][f] + neighbor_hits = params[:training_compound_features_hits][f] + p_sum += eval("(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))") + } + p_sum + end + end end + + diff --git a/lib/compound.rb b/lib/compound.rb index f631ca9..e7b4da0 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -21,6 +21,17 @@ module OpenTox else @inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri end + + if @uri and @inchi.to_s.size==0 + LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles" + @inchi = Compound.smiles2inchi(Compound.smiles(@uri)) + end + end + + # request smiles from compound service via accept header + # @return smiles as string + def self.smiles(uri) + RestClientWrapper.get(uri, :accept => 'chemical/x-daylight-smiles').to_s.chomp end # Create a compound from smiles string @@ -153,6 +164,35 @@ module OpenTox #smarts_array.collect { |s| s if match?(s)}.compact end + # Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value + # @example + # compound = OpenTox::Compound.from_name("Benzene") + # compound.match(['cc','cN']) # returns ['cc'] + # @param [Array] smarts_array Array with Smarts strings + # @return [Hash] Hash with matching smarts as key and number of non-unique hits as value + def match_hits(smarts_array) + # avoid recreation of OpenBabel objects + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_format('inchi') + obconversion.read_string(obmol,@inchi) + smarts_pattern = OpenBabel::OBSmartsPattern.new + smarts_hits = {} + #LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}" + smarts_array.collect do |smarts| + #LOGGER.debug "dv ----------- all smarts #{smarts}" + smarts_pattern.init(smarts) + if smarts_pattern.match(obmol) + hits = smarts_pattern.get_map_list + smarts_hits[smarts] = hits.size + end + end + #LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}" + return smarts_hits + #smarts_array.collect { |s| s if match?(s)}.compact + end + + # Get URI of compound image with highlighted fragments # # @param [Array] activating Array with activating Smarts strings diff --git a/lib/config/config_ru.rb b/lib/config/config_ru.rb index 93df867..dc04263 100644 --- a/lib/config/config_ru.rb +++ b/lib/config/config_ru.rb @@ -19,6 +19,7 @@ set :lock, true end use Rack::ShowExceptions +=begin if defined?(MAIL) # monkeypatch with the original method @@ -50,3 +51,4 @@ if defined?(MAIL) mail.smtp MAIL end end +=end diff --git a/lib/dataset.rb b/lib/dataset.rb index 4005c1c..5ebad0f 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -102,6 +102,13 @@ module OpenTox copy parser.load_uri(subjectid) end + def load_sdf(sdf,subjectid=nil) + save(subjectid) unless @uri # get a uri for creating features + parser = Parser::Sdf.new + parser.dataset = self + parser.load_sdf(sdf) + end + # Load CSV string (format specification: http://toxcreate.org/help) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors @@ -149,7 +156,11 @@ module OpenTox # Load and return only compound URIs from the dataset service # @return [Array] Compound URIs in the dataset def load_compounds(subjectid=nil) - RestClientWrapper.get(File.join(uri,"compounds"),{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri| + # fix for datasets like http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50 + u = URI::parse(uri) + u.path = File.join(u.path,"compounds") + u = u.to_s + RestClientWrapper.get(u,{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri| @compounds << compound_uri.chomp end @compounds.uniq! @@ -167,19 +178,15 @@ module OpenTox @features end - def feature_classes(feature, subjectid=nil) - if Feature.find(feature, subjectid).feature_type == "classification" - classes = [] - @data_entries.each do |c,e| - e[feature].each { |v| classes << v.to_s } - end - classes.uniq.sort - else - nil - end + # returns the accept_values of a feature, i.e. the classification domain / all possible feature values + # @param [String] feature the URI of the feature + # @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric) + def accept_values(feature) + accept_values = features[feature][OT.acceptValue] + accept_values.sort if accept_values + accept_values end -=begin # Detect feature type(s) in the dataset # @return [String] `classification", "regression", "mixed" or unknown` def feature_type(subjectid=nil) @@ -193,6 +200,7 @@ module OpenTox "unknown" end end +=begin =end # Get Spreadsheet representation @@ -229,6 +237,30 @@ module OpenTox s.to_rdfxml end + # Get SDF representation of compounds + # @return [String] SDF representation + def to_sdf + sum="" + @compounds.each{ |c| + sum << OpenTox::Compound.new(c).to_inchi + sum << OpenTox::Compound.new(c).to_sdf.sub(/\n\$\$\$\$/,'') + @data_entries[c].each{ |f,v| + sum << "> <\"#{f}\">\n" + sum << v.join(", ") + sum << "\n\n" + } + sum << "$$$$\n" + } + sum + end + + def to_urilist + @compounds.inject { |sum, c| + sum << OpenTox::Compound.new(c).uri + sum + "\n" + } + end + # Get name (DC.title) of a feature # @param [String] feature Feature URI # @return [String] Feture title @@ -307,6 +339,12 @@ module OpenTox end end end + # set feature metadata in new dataset accordingly (including accept values) + features.each do |f| + self.features[f].each do |k,v| + dataset.features[f][k] = v + end + end dataset.add_metadata(metadata) dataset.save(subjectid) dataset @@ -369,12 +407,14 @@ module OpenTox end def value(compound) - @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first + v = nil + v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri] + v = nil if v.is_a? Array and v.empty? + v end def confidence(compound) - feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first - @features[feature_uri][OT.confidence] + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri] end def descriptors(compound) @@ -382,12 +422,11 @@ module OpenTox end def measured_activities(compound) - source = @metadata[OT.hasSource] - @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten + @data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri] end def neighbors(compound) - @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact + @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri] end # def errors(compound) diff --git a/lib/environment.rb b/lib/environment.rb index ffc4f60..6d1bb85 100644 --- a/lib/environment.rb +++ b/lib/environment.rb @@ -27,7 +27,7 @@ end Ohm.connect :thread_safe => true # load mail settings for error messages -load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb") +#load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb") logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log" #LOGGER = OTLogger.new(logfile,'daily') # daily rotation @@ -40,8 +40,8 @@ else end # Regular expressions for parsing classification data -TRUE_REGEXP = /^(true|active|1|1.0|tox)$/i -FALSE_REGEXP = /^(false|inactive|0|0.0|low tox)$/i +TRUE_REGEXP = /^(true|active|1|1.0|tox|activating)$/i +FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating)$/i # Task durations DEFAULT_TASK_MAX_DURATION = 36000 diff --git a/lib/feature.rb b/lib/feature.rb index b631e46..4ba58ce 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -2,6 +2,8 @@ module OpenTox class Feature include OpenTox + attr_accessor :subjectid + # Find a feature # @param [String] uri Feature URI # @return [OpenTox::Task] Feature object @@ -13,31 +15,31 @@ module OpenTox else feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata end + feature.subjectid = subjectid feature end - + # provides feature type, possible types are "regression" or "classification" # @return [String] feature type, unknown if OT.isA property is unknown/ not set def feature_type + raise OpenTox::BadRequestError.new("rdf type of feature '"+uri.to_s+"' not set") unless metadata[RDF.type] if metadata[RDF.type].flatten.include?(OT.NominalFeature) "classification" elsif metadata[RDF.type].flatten.include?(OT.NumericFeature) "regression" - else - #"unknown" - metadata[RDF.type].inspect - end -=begin - case metadata[RDF.type] - when /NominalFeature/ - "classification" - when /NumericFeature/ - "regression" + elsif metadata[OWL.sameAs] + metadata[OWL.sameAs].each do |f| + begin + type = Feature.find(f, subjectid).feature_type + return type unless type=="unknown" + rescue => ex + LOGGER.warn "could not load same-as-feature '"+f.to_s+"' for feature '"+uri.to_s+"' : "+ex.message.to_s + end + end + "unknown" else "unknown" end -=end end - end end diff --git a/lib/helper.rb b/lib/helper.rb index 995f3e9..33774b4 100644 --- a/lib/helper.rb +++ b/lib/helper.rb @@ -81,7 +81,7 @@ helpers do when "css" @accept = 'text/css' else - # halt 404, "File format #{extension} not supported." + # raise OpenTox::NotFoundError.new "File format #{extension} not supported." end end end @@ -94,4 +94,3 @@ before do protected!(@subjectid) end end - diff --git a/lib/model.rb b/lib/model.rb index 048de85..ff0ce0e 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -23,7 +23,7 @@ module OpenTox # Generic OpenTox model class for all API compliant services class Generic include Model - + # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error # @param [String] uri Model URI # @return [OpenTox::Model::Generic] Model instance @@ -34,42 +34,75 @@ module OpenTox raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0 model end - - # provides feature type, possible types are "regression" or "classification" - # @return [String] feature type, "unknown" if type could not be estimated + + # provides feature type, possible types are "regression" or "classification" + # @return [String] feature type, "unknown" if type could not be estimated def feature_type(subjectid=nil) - return @feature_type if @feature_type - - # dynamically perform restcalls if necessary - load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) - algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid) - algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil - algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil - dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid ) - dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil - type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten - type_indicators.each do |type| - case type - when /(?i)classification/ - @feature_type = "classification" - break - when /(?i)regression/ - @feature_type = "regression" - end + unless @feature_type + load_predicted_variables( subjectid ) unless @predicted_variable + @feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type end - raise "unknown model "+type_indicators.inspect unless @feature_type @feature_type end - - end + def predicted_variable( subjectid ) + load_predicted_variables( subjectid ) unless @predicted_variable + @predicted_variable + end + + def predicted_variables( subjectid ) + load_predicted_variables( subjectid, false ) unless @predicted_variables + @predicted_variables + end + + def predicted_confidence( subjectid ) + load_predicted_variables( subjectid ) unless @predicted_confidence + @predicted_confidence + end + + private + def load_predicted_variables( subjectid=nil, use_confidence=true ) + load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) + if @metadata[OT.predictedVariables] + predictedVariables = @metadata[OT.predictedVariables] + if predictedVariables.is_a?(Array) + if (predictedVariables.size==1) + @predicted_variable = predictedVariables[0] + elsif (predictedVariables.size>=2) + # PENDING identify confidence + if use_confidence + conf_index = -1 + predictedVariables.size.times do |i| + f = OpenTox::Feature.find(predictedVariables[i], subjectid) + conf_index = i if f.metadata[DC.title]=~/(?i)confidence/ + end + raise "could not estimate predicted variable from model: '"+uri.to_s+ + "', number of predicted-variables==2, but no confidence found" if conf_index==-1 + end + if (predictedVariables.size==2) && use_confidence + @predicted_variable = predictedVariables[1-conf_index] + @predicted_confidence = predictedVariables[conf_index] + else + @predicted_variables = predictedVariables + end + else + raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0" + end + else + raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array" + end + end + raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables) + end + end + # Lazy Structure Activity Relationship class class Lazar - include Model include Algorithm + include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max def initialize(uri=nil) @@ -78,7 +111,7 @@ module OpenTox else super CONFIG[:services]["opentox-model"] end - + @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") @features = [] @@ -86,12 +119,18 @@ module OpenTox @activities = {} @p_values = {} @fingerprints = {} + @value_map = {} + @prediction_min_max = [] @feature_calculation_algorithm = "Substructure.match" @similarity_algorithm = "Similarity.tanimoto" @prediction_algorithm = "Neighbors.weighted_majority_vote" - + + @nr_hits = false @min_sim = 0.3 + @prop_kernel = false + @transform = { "class" => "NOP" } + @conf_stdev = false end @@ -111,13 +150,25 @@ module OpenTox # Create a new lazar model # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) # @return [OpenTox::Model::Lazar] lazar model - def self.create(params) + def self.create(params, waiting_task=nil ) subjectid = params[:subjectid] lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar") - model_uri = lazar_algorithm.run(params) + model_uri = lazar_algorithm.run(params, waiting_task) OpenTox::Model::Lazar.find(model_uri, subjectid) end + def run( params, accept_header=nil, waiting_task=nil ) + unless accept_header + if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host) + accept_header = 'application/x-yaml' + else + accept_header = 'application/rdf+xml' + end + end + LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s + RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s + end + # Get a parameter value # @param [String] param Parameter name # @return [String] Parameter value @@ -131,6 +182,7 @@ module OpenTox # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly # @return [OpenTox::Dataset] Dataset with predictions def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil) + @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) @prediction_dataset.add_metadata({ OT.hasSource => @uri, @@ -150,7 +202,7 @@ module OpenTox LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message end end - @prediction_dataset.save(subjectid) + #@prediction_dataset.save(subjectid) @prediction_dataset end @@ -164,49 +216,52 @@ module OpenTox features = {} unless @prediction_dataset - #@prediction_dataset = cached_prediction - #return @prediction_dataset if cached_prediction @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) @prediction_dataset.add_metadata( { OT.hasSource => @uri, DC.creator => @uri, - # TODO: fix dependentVariable DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] } ) end - return @prediction_dataset if database_activity(subjectid) - - neighbors - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - - prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) - # TODO: fix dependentVariable - @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri - - if @neighbors.size == 0 - @prediction_dataset.add_feature(prediction_feature_uri, { - RDF.type => [OT.MeasuredFeature], - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.error => "No similar compounds in training dataset.", - OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - }) - @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] + if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression" + all_activities = [] + all_activities = @activities.values.flatten.collect! { |i| i.to_f } + @prediction_min_max[0] = (all_activities.to_scale.min/2) + @prediction_min_max[1] = (all_activities.to_scale.max*2) + end - else - @prediction_dataset.add_feature(prediction_feature_uri, { - RDF.type => [OT.ModelPrediction], - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.prediction => prediction[:prediction], - OT.confidence => prediction[:confidence], - OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - }) - @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] + unless database_activity(subjectid) # adds database activity to @prediction_dataset + + neighbors + prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors, + :compound => @compound, + :features => @features, + :p_values => @p_values, + :fingerprints => @fingerprints, + :similarity_algorithm => @similarity_algorithm, + :prop_kernel => @prop_kernel, + :value_map => @value_map, + :nr_hits => @nr_hits, + :conf_stdev => @conf_stdev, + :prediction_min_max => @prediction_min_max, + :transform => @transform } ) ") + + value_feature_uri = File.join( @uri, "predicted", "value") + confidence_feature_uri = File.join( @uri, "predicted", "confidence") + + @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables] + @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables] + + if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification" + @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] + else + @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] + end + @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] + @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title] + @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence" if verbose if @feature_calculation_algorithm == "Substructure.match" @@ -260,7 +315,6 @@ module OpenTox end n+=1 end - # what happens with dataset predictions? end end @@ -268,33 +322,49 @@ module OpenTox @prediction_dataset end - # Find neighbors and store them as object variable - def neighbors + + # Find neighbors and store them as object variable, access all compounds for that. + def neighbors @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - @fingerprints.each do |training_compound,training_features| - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - end - end + @fingerprints.keys.each do |training_compound| # AM: access all compounds + add_neighbor @fingerprints[training_compound].keys, training_compound end + end + # Adds a neighbor to @neighbors if it passes the similarity threshold. + def add_neighbor(training_features, training_compound) + compound_features_hits = {} + training_compound_features_hits = {} + if @nr_hits + compound_features_hits = @compound.match_hits(@compound_features) + training_compound_features_hits = @fingerprints[training_compound] + #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}" + end + params = {} + params[:nr_hits] = @nr_hits + params[:compound_features_hits] = compound_features_hits + params[:training_compound_features_hits] = training_compound_features_hits + + sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)") + if sim > @min_sim + @activities[training_compound].each do |act| + @neighbors << { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } + end + end end # Find database activities and store them in @prediction_dataset # @return [Boolean] true if compound has databasse activities, false if not def database_activity(subjectid) if @activities[@compound.uri] - @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act } + @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] } @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset]) @prediction_dataset.save(subjectid) true @@ -303,6 +373,35 @@ module OpenTox end end + def prediction_features + [prediction_value_feature,prediction_confidence_feature] + end + + def prediction_value_feature + dependent_uri = @metadata[OT.dependentVariables].first + feature = OpenTox::Feature.new File.join( @uri, "predicted", "value") + feature.add_metadata( { + RDF.type => [OT.ModelPrediction], + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( dependent_uri )), + OWL.sameAs => dependent_uri + }) + feature + end + + def prediction_confidence_feature + dependent_uri = @metadata[OT.dependentVariables].first + feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence") + feature.add_metadata( { + RDF.type => [OT.ModelPrediction], + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence" + }) + feature + end + # Save model at model service def save(subjectid) self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid}) diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb index ae05cb2..1fa2a86 100644 --- a/lib/opentox-ruby.rb +++ b/lib/opentox-ruby.rb @@ -1,4 +1,4 @@ -['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib| +['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib| require lib end diff --git a/lib/overwrite.rb b/lib/overwrite.rb index df4e1b7..393e8e7 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -50,7 +50,8 @@ class Sinatra::Base halt task.http_code,task.to_yaml # PENDING differs from task-webservice when /html/ response['Content-Type'] = "text/html" - halt task.http_code,OpenTox.text_to_html(task.to_yaml, @subjectid) + # html -> task created with html form -> redirect to task uri + redirect task.uri else # default /uri-list/ response['Content-Type'] = "text/uri-list" if task.completed? diff --git a/lib/parser.rb b/lib/parser.rb index 5f847c3..d0975af 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -40,8 +40,9 @@ module OpenTox else file = Tempfile.new("ot-rdfxml") if @dataset - # do not concat /metadata to uri string, this would not work for dataset/R401577?max=3 uri = URI::parse(@uri) + #remove params like dataset/<id>?max=3 from uri, not needed for metadata + uri.query = nil uri.path = File.join(uri.path,"metadata") uri = uri.to_s else @@ -56,7 +57,7 @@ module OpenTox `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.to_triple if triple[0] == @uri - if triple[1] == RDF.type # allow multiple types + if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types @metadata[triple[1]] = [] unless @metadata[triple[1]] @metadata[triple[1]] << triple[2].split('^^').first else @@ -75,6 +76,9 @@ module OpenTox @metadata[OT.parameters] << parameter end end + #@metadata.each do |k,v| + #v = v.first if v and v.size == 1 + #end @metadata end @@ -82,7 +86,11 @@ module OpenTox # @param [String] rdf # @param [String] type of the info (e.g. OT.Task, OT.ErrorReport) needed to get the subject-uri # @return [Owl] with uri and metadata set - def self.from_rdf( rdf, type ) + def self.from_rdf( rdf, type, allow_multiple = false ) + + uris = Array.new + owls = Array.new + # write to file and read convert with rapper into tripples file = Tempfile.new("ot-rdfxml") file.puts rdf @@ -95,20 +103,27 @@ module OpenTox triples.each_line do |line| triple = line.to_triple if triple[1] == RDF['type'] and triple[2]==type - raise "uri already set, two uris found with type: "+type.to_s if uri + if !allow_multiple + raise "uri already set, two uris found with type: "+type.to_s if uri + end uri = triple[0] + uris << uri end end File.delete(file.path) + # load metadata - metadata = {} - triples.each_line do |line| - triple = line.to_triple - metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type'] - end - owl = Owl::Generic.new(uri) - owl.metadata = metadata - owl + uris.each { |uri| + metadata = {} + triples.each_line do |line| + triple = line.to_triple + metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type'] + end + owl = Owl::Generic.new(uri) + owl.metadata = metadata + owls << owl + } + allow_multiple ? owls : owls[0] end # Generic parser for all OpenTox classes @@ -228,7 +243,12 @@ module OpenTox file = Tempfile.new("ot-rdfxml") # do not concat /features to uri string, this would not work for dataset/R401577?max=3 uri = URI::parse(@uri) - uri.path = File.join(uri.path,"features") + # PENDING + # ambit models return http://host/dataset/id?feature_uris[]=sth but + # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth + # and features are not inlcuded in http://host/dataset/id/features + # -> load features from complete dataset + uri.path = File.join(uri.path,"features") unless @uri=~/\?(feature_uris|page|pagesize)/ uri = uri.to_s file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false file.close @@ -244,8 +264,13 @@ module OpenTox File.delete(to_delete) if to_delete statements.each do |triple| if features.include? triple[0] - @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]] - @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first + @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]] + if triple[1] == RDF.type + @dataset.features[triple[0]][triple[1]] = [] unless @dataset.features[triple[0]][triple[1]] + @dataset.features[triple[0]][triple[1]] << triple[2].split('^^').first + else + @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first + end end end @dataset.features @@ -271,22 +296,39 @@ module OpenTox @duplicates = {} end + def detect_new_values(row, value_maps) + row.shift + row.each_index do |i| + value = row[i] + value_maps[i] = Hash.new if value_maps[i].nil? + value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1 + end + value_maps + end + # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help) # @param [Excel] book Excel workbook object (created with roo gem) # @return [OpenTox::Dataset] Dataset object with Excel data def load_spreadsheet(book) book.default_sheet = 0 add_features book.row(1) + value_maps = Array.new + regression_features=Array.new - # AM: fix mixed read in - regression_features=false 2.upto(book.last_row) { |i| row = book.row(i) - regression_features = detect_regression_features row - break if regression_features==true + value_maps = detect_new_values(row, value_maps) + value_maps.each_with_index { |vm,j| + if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer. + regression_features[j]=true + else + regression_features[j]=false + end + } + } + 2.upto(book.last_row) { |i| + add_values book.row(i), regression_features } - - 2.upto(book.last_row) { |i| add_values book.row(i),regression_features } warnings @dataset end @@ -298,21 +340,27 @@ module OpenTox row = 0 input = csv.split("\n") add_features split_row(input.shift) + value_maps = Array.new + regression_features=Array.new - - # AM: fix mixed read in - regression_features=false input.each { |row| row = split_row(row) - regression_features = detect_regression_features row - break if regression_features==true + value_maps = detect_new_values(row, value_maps) + value_maps.each_with_index { |vm,j| + if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer. + regression_features[j]=true + else + regression_features[j]=false + end + } + } + input.each { |row| + add_values split_row(row), regression_features } - input.each { |row| add_values split_row(row),regression_features } warnings @dataset end - private def warnings @@ -354,20 +402,10 @@ module OpenTox end end - def detect_regression_features row - row.shift - regression_features=false - row.each_index do |i| - value = row[i] - type = feature_type(value) - if type == OT.NumericFeature - regression_features=true - end - end - regression_features - end - - def add_values(row, regression_features=false) + # Adds a row to a dataset + # @param Array A row split up as an array + # @param Array Indicator for regression for each field + def add_values(row, regression_features) smiles = row.shift compound = Compound.from_smiles(smiles) @@ -381,27 +419,23 @@ module OpenTox row.each_index do |i| value = row[i] feature = @features[i] - type = feature_type(value) + type = nil + if (regression_features[i]) + type = feature_type(value) + if type != OT.NumericFeature + raise "Error! Expected numeric values." + end + else + type = OT.NominalFeature + end @feature_types[feature] << type - if (regression_features) + case type + when OT.NumericFeature val = value.to_f - else - case type - when OT.NominalFeature - case value.to_s - when TRUE_REGEXP - val = true - when FALSE_REGEXP - val = false - end - when OT.NumericFeature - val = value.to_f - when OT.StringFeature - val = value.to_s - @activity_errors << smiles+", "+row.join(", ") - end + when OT.NominalFeature + val = value.to_s end if val!=nil @dataset.add(compound.uri, feature, val) @@ -413,26 +447,170 @@ module OpenTox end end - def numeric?(value) - true if Float(value) rescue false + def feature_type(value) + if OpenTox::Algorithm::numeric? value + return OT.NumericFeature + else + return OT.NominalFeature + end + end + + def split_row(row) + row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/) # remove quotes + end + + end + + class Table + + attr_accessor :data, :features, :compounds + + def initialize + @data = {} + @activity_errors = [] + end + + def feature_values(feature) + @data.collect{|c, row| row[feature]}.uniq.compact + end + + def feature_types(feature) + @data.collect{|c, row| feature_type(row[feature])}.uniq.compact + end + + def features + @data.collect{|c,row| row.keys}.flatten.uniq + end + + def clean_features + ignored_features = [] + features.each do |feature| + if feature_values(feature).size > 5 + if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature + # REGRESSION + elsif feature_types(feature).include? OT.NumericFeature + @data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features + @activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)." + else + @activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)." + ignored_features << feature + next + end + elsif feature_values(feature).size <= 1 + @activity_errors << "Feature #{feature} ignored (less than 2 feature values)." + ignored_features << feature + else + # CLASSIFICATION + end + end + ignored_features.each do |feature| + @data.each{ |c,row| row.delete feature } + end + @activity_errors end - def classification?(value) - !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil? + def add_to_dataset(dataset) + features.each do |feature_name| + feature_uri = File.join(dataset.uri,"feature",URI.encode(feature_name)) + dataset.add_feature(feature_uri,{DC.title => feature_name}) + end + + @data.each do |compound,row| + unless row.empty? + row.each do |feature,value| + if OpenTox::Algorithm::numeric?(value) + value = value.to_f + elsif value.nil? or value.empty? + value = nil + else + value = value.to_s + end + feature_uri = File.join(dataset.uri,"feature",URI.encode(feature)) + dataset.add(compound, feature_uri, value) + #dataset.features[feature_uri][RDF.type] = feature_types(feature) + #dataset.features[feature_uri][OT.acceptValue] = feature_values(feature) + if feature_types(feature).include? OT.NumericFeature + dataset.features[feature_uri][RDF.type] = [OT.NumericFeature] + else + dataset.features[feature_uri][RDF.type] = [OT.NominalFeature] + dataset.features[feature_uri][OT.acceptValue] = feature_values(feature) + end + end + end + end end + private + def feature_type(value) - if classification? value - return OT.NominalFeature - elsif numeric? value + if OpenTox::Algorithm::numeric? value return OT.NumericFeature else - return OT.StringFeature + return OT.NominalFeature end end + end + + # quick hack to enable sdf import via csv + # should be refactored + class Sdf + + attr_accessor :dataset + + def initialize + @data = {} + + @compound_errors = [] + @activity_errors = [] + @duplicates = {} + end + + def load_sdf(sdf) + + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_and_out_formats "sdf", "inchi" + + table = Table.new + + properties = [] + sdf.each_line { |l| properties << l.to_s if l.match(/</) } + properties.uniq! + properties.sort! + properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp } + + rec = 0 + sdf.split(/\$\$\$\$\r*\n/).each do |s| + rec += 1 + obconversion.read_string obmol, s + begin + inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp + @duplicates[inchi] = [] unless @duplicates[inchi] + @duplicates[inchi] << rec #inchi#+", "+row.join(", ") + compound = Compound.from_inchi inchi + rescue + @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec} have been ignored! \n#{s}" + next + end + row = {} + obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) } + table.data[compound.uri] = row + end + + # finda and remove ignored_features + @activity_errors = table.clean_features + table.add_to_dataset @dataset + + warnings = '' + warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty? + warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty? + duplicate_warnings = '' + @duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 } + warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty? + + @dataset.metadata[OT.Warnings] = warnings + @dataset - def split_row(row) - row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes end end diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb index 747a353..6d25bb3 100644 --- a/lib/rest_client_wrapper.rb +++ b/lib/rest_client_wrapper.rb @@ -131,13 +131,14 @@ module OpenTox raise "unknown content-type for task : '"+res.content_type.to_s+"'"+" base-uri: "+base_uri.to_s+" content: "+res[0..200].to_s end - LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion" + #LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion" task.wait_for_completion waiting_task unless task.completed? # maybe task was cancelled / error if task.errorReport received_error task.errorReport, task.http_code, nil, {:rest_uri => task.uri, :rest_code => task.http_code} else - raise "task status: '"+task.status.to_s+"' but errorReport nil" + raise "status of task '"+task.uri.to_s+"' is no longer running (hasStatus is '"+task.status+ + "'), but it is neither completed nor has an errorReport" end end diff --git a/lib/serializer.rb b/lib/serializer.rb index e4cb541..3921784 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -17,6 +17,7 @@ module OpenTox # this should come from opentox.owl OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Model => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , @@ -27,6 +28,8 @@ module OpenTox OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OTA.PatternMiningSupervised => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OTA.ClassificationLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OTA.RegressionLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , #classes for validation OT.Validation => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , @@ -45,6 +48,10 @@ module OpenTox OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.featureDataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.dependentVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.predictedVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , #object props for validation# OT.model => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , @@ -103,6 +110,7 @@ module OpenTox OT.precision => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.areaUnderRoc => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.weightedAreaUnderRoc => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.weightedAccuracy => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.fMeasure => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.percentIncorrect => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.validationType => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , @@ -126,7 +134,7 @@ module OpenTox OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , - OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + #OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , } @data_entries = {} @@ -157,23 +165,16 @@ module OpenTox # Add a dataset # @param [String] uri Dataset URI def add_dataset(dataset) - @dataset = dataset.uri - @object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } - add_metadata dataset.uri, dataset.metadata - dataset.compounds.each { |compound| add_compound compound } - dataset.features.each { |feature,metadata| add_feature feature,metadata } - dataset.data_entries.each do |compound,entry| entry.each do |feature,values| values.each { |value| add_data_entry compound,feature,value } end end - end # Add a algorithm @@ -188,6 +189,14 @@ module OpenTox def add_model(uri,metadata) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] } add_metadata uri, metadata + @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } + @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } + @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } + metadata[OT.predictedVariables].each{|feature| @object[feature] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }]}} #unless metadata[OT.predictedVariables].nil? + # TODO: add algorithms from parameters + @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + @object["http://ot-dev.in-silico.ch/algorithm/lazar"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } end # Add a task @@ -272,7 +281,7 @@ module OpenTox @object[genid][name] = [{"type" => type(entry), "value" => entry }] end end - elsif v.is_a? Array and u == RDF.type + elsif v.is_a? Array #and u == RDF.type @object[uri] = {} unless @object[uri] v.each do |value| @object[uri][u] = [] unless @object[uri][u] @@ -354,7 +363,8 @@ module OpenTox # @return [text/plain] Object OWL-DL in RDF/XML format def to_rdfxml Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path} - `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null` + # TODO: add base uri for ist services + `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:ota="#{OTA.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null` end # Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification @@ -373,6 +383,8 @@ module OpenTox XSD.boolean elsif value.is_a? Float XSD.float + elsif value.is_a? Integer + XSD.integer else XSD.string end @@ -383,6 +395,8 @@ module OpenTox datatype = OT.NominalFeature elsif value.is_a? Float datatype = OT.NumericFeature + elsif value.is_a? Integer + datatype = OT.NumericFeature else datatype = OT.StringFeature end diff --git a/lib/task.rb b/lib/task.rb index 19f42d6..00499fa 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -38,6 +38,7 @@ module OpenTox task = Task.new(task_uri.chomp) # measure current memory consumption +=begin memory = `free -m|sed -n '2p'`.split free_memory = memory[3].to_i + memory[6].to_i # include cache if free_memory < 20 # require at least 200 M free memory @@ -56,6 +57,7 @@ module OpenTox # return task # #raise "Server too busy to start a new task" #end +=end task_pid = Spork.spork(:logger => LOGGER) do LOGGER.debug "Task #{task.uri} started #{Time.now}" @@ -167,6 +169,10 @@ module OpenTox @metadata[OT.hasStatus] == 'Running' end + def queued? + @metadata[OT.hasStatus] == 'Queued' + end + def completed? @metadata[OT.hasStatus] == 'Completed' end @@ -284,9 +290,10 @@ module OpenTox raise "illegal task state, task is completed, resultURI is no URI: '"+@metadata[OT.resultURI].to_s+ "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? if completed? if @http_code == 202 - raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running: '"+@metadata[OT.hasStatus]+"'" unless running? + raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running or Queued: '"+@metadata[OT.hasStatus]+"'" unless running? or queued? elsif @http_code == 201 - raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed? + # ignore hasStatus + # raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed? raise "#{@uri}: illegal task state, code is 201, resultURI is no task-URI: '"+@metadata[OT.resultURI].to_s+ "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? end diff --git a/lib/to-html.rb b/lib/to-html.rb index 6785974..2979062 100644 --- a/lib/to-html.rb +++ b/lib/to-html.rb @@ -1,12 +1,12 @@ -OT_LOGO = "http://opentox.informatik.uni-freiburg.de/ot-logo.png" +OT_LOGO = File.join(CONFIG[:services]["opentox-validation"],"resources/ot-logo.png") class String # encloses URI in text with with link tag # @return [String] new text with marked links def link_urls - self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '<a href=\0>\0</a>') + self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '<a href="\0">\0</a>') end end @@ -15,98 +15,123 @@ module OpenTox # produces a html page for making web services browser friendly # format of text (=string params) is preserved (e.g. line breaks) # urls are marked as links - # @example post params: - # [ [ [:mandatory_param_1], [:mandatory_param_2], [:optional_param,"default_value"] ], - # [ [:alteranative_mandatory_param_1], [:alteranative_mandatory_param_2] ] - # ] + # # @param [String] text this is the actual content, # @param [optional,String] related_links info on related resources # @param [optional,String] description general info - # @param [optional,Array] post_params, array of arrays containing info on POST operation, see example + # @param [optional,Array] post_command, infos for the post operation, object defined below # @return [String] html page - def self.text_to_html( text, subjectid=nil, related_links=nil, description=nil, post_params=nil ) + def self.text_to_html( text, subjectid=nil, related_links=nil, description=nil, post_command=nil ) # TODO add title as parameter title = nil #$sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra html = "<html>" html += "<title>"+title+"</title>" if title - html += "<img src="+OT_LOGO+"><body>" + html += "<img src=\""+OT_LOGO+"\"><\/img><body>" if AA_SERVER user = OpenTox::Authorization.get_user(subjectid) if subjectid html += "<pre><p align=\"right\">" unless user - html += "You are currently not logged in to "+$url_provider.url_for("",:full)+ - ", <a href="+$url_provider.url_for("/login",:full)+">login</a>" + html += "You are currently not signed in to "+$url_provider.url_for("",:full)+ + ", <a href="+$url_provider.url_for("/sign_in",:full)+">sign in</a>" else - html += "You are logged in as '#{user}' to "+$url_provider.url_for("",:full)+ - ", <a href="+$url_provider.url_for("/logout",:full)+">logout</a>" + html += "You are signed in as '#{user}' to "+$url_provider.url_for("",:full)+ + ", <a href="+$url_provider.url_for("/sign_out",:full)+">sign out</a>" end html += " </p></pre>" end html += "<h3>Description</h3><pre><p>"+description.link_urls+"</p></pre>" if description html += "<h3>Related links</h3><pre><p>"+related_links.link_urls+"</p></pre>" if related_links - if post_params - html += "<h3>POST parameters</h3>" - count = 0 - post_params.each do |p| - html += "<pre><p>alternatively:</p></pre>" if count > 0 - html += "<pre><p><table><thead><tr><th>param</th><th>default_value</th></tr></thead>" - p.each do |k,v| - html += "<tr><th>"+k.to_s+"</th><th>"+(v!=nil ? v.to_s : "<i>mandatory</i>")+"</th></tr>" - end - html += "</table></p></pre>" - count += 1 - end + if post_command + raise "not a post command" unless post_command.is_a?(OpenTox::PostCommand) + html += "<h3>POST command</h3>" + html += post_command.to_html end - html += "<h3>Content</h3>" if description || related_links + html += "<h3>Content</h3>" if description || related_links || post_command html += "<pre><p style=\"padding:15px; border:10px solid \#5D308A\">" html += text.link_urls - html += "</p></pre></body><html>" + html += "</p></pre></body></html>" html end - def self.login( msg=nil ) + def self.sign_in( msg=nil ) html = "<html><title>Login</title><img src="+OT_LOGO+"><body>" - html += "<form method='POST' action='"+$url_provider.url_for("/login",:full)+"'>" + html += "<form method='POST' action='"+$url_provider.url_for("/sign_in",:full)+"'>" html += "<pre><p style=\"padding:15px; border:10px solid \#5D308A\">" html += msg+"\n\n" if msg - html += "Please login to "+$url_provider.url_for("",:full)+"\n\n" + html += "Please sign in to "+$url_provider.url_for("",:full)+"\n\n" html += "<table border=0>" html += "<tr><td>user:</td><td><input type='text' name='user' size='15' /></td></tr>"+ "<tr><td>password:</td><td><input type='password' name='password' size='15' /></td></tr>"+ #"<input type=hidden name=back_to value="+back_to.to_s+">"+ - "<tr><td><input type='submit' value='Login' /></td></tr>" - html += "</table></p></pre></form></body><html>" + "<tr><td><input type='submit' value='Sign in' /></td></tr>" + html += "</table></p></pre></form></body></html>" html end + + class PostAttribute + attr_accessor :name, :is_mandatory, :default, :description + + def initialize(name, is_mandatory=true, default=nil, description=nil) + @name = name + @is_mandatory = is_mandatory + @default = default + @description = description + end + end + + class PostCommand + attr_accessor :attributes, :uri, :name + + def initialize( uri, name="Send" ) + @uri = uri + @name = name + @attributes = [] + end + + def to_html + html = "<form method='POST' action='"+@uri.to_s+"'>" + html << "<pre><p>" + html << "<table border=0>" + #html << "<tr><td colspan='3'><i><sup>Mandatory params are marked with *.</sup></i></td></tr>" + attributes.each do |a| + mandatory_string = a.is_mandatory ? "*" : "" + html << "<tr><td>"+a.name.to_s+":"+mandatory_string+"</td>" + html << "<td><input type='text' name='"+a.name.to_s+ + "' size='50' value='"+a.default.to_s+"'/></td>" + html << "<td><i><sup>"+a.description.to_s+"</sup></i></td></tr>" + end + html << "<tr><td colspan='3'><input type='submit' value='"+@name.to_s+"' /></td></tr>" + html << "</table></p></pre></form>" + html + end + end end -=begin -get '/logout/?' do +get '/sign_out/?' do response.set_cookie("subjectid",{:value=>nil}) content_type "text/html" - content = "Sucessfully logged out from "+$url_provider.url_for("",:full) + content = "Sucessfully signed out from "+$url_provider.url_for("",:full) OpenTox.text_to_html(content) end -get '/login/?' do +get '/sign_in/?' do content_type "text/html" - OpenTox.login + OpenTox.sign_in end -post '/login/?' do +post '/sign_in/?' do subjectid = OpenTox::Authorization.authenticate(params[:user], params[:password]) if (subjectid) response.set_cookie("subjectid",{:value=>subjectid}) content_type "text/html" - content = "Sucessfully logged in as '"+params[:user]+"' to "+$url_provider.url_for("",:full) + content = "Sucessfully signed in as '"+params[:user]+"' to "+$url_provider.url_for("",:full) OpenTox.text_to_html(content,subjectid) else content_type "text/html" - OpenTox.login("Login failed, please try again") + OpenTox.sign_in("Login failed, please try again") end end -=end diff --git a/lib/validation.rb b/lib/validation.rb index d58d36e..646b076 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -36,6 +36,30 @@ module OpenTox Validation.new(uri) end + # creates a training test validation, waits until it finishes, may take some time + # @param [Hash] params (required:algorithm_uri,training_dataset_uri,prediction_feature,test_dataset_uri,optional:algorithm_params) + # @param [String,optional] subjectid + # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly + # @return [OpenTox::Validation] + def self.create_training_test_validation( params, subjectid=nil, waiting_task=nil ) + params[:subjectid] = subjectid if subjectid + uri = OpenTox::RestClientWrapper.post( File.join(CONFIG[:services]["opentox-validation"],"training_test_validation"), + params,{:content_type => "text/uri-list"},waiting_task ) + Validation.new(uri) + end + + # creates a bootstrapping validation, waits until it finishes, may take some time + # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,random_seed(1)) + # @param [String,optional] subjectid + # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly + # @return [OpenTox::Validation] + def self.create_bootstrapping_validation( params, subjectid=nil, waiting_task=nil ) + params[:subjectid] = subjectid if subjectid + uri = OpenTox::RestClientWrapper.post( File.join(CONFIG[:services]["opentox-validation"],"bootstrapping"), + params,{:content_type => "text/uri-list"},waiting_task ) + Validation.new(uri) + end + # looks for report for this validation, creates a report if no report is found # @param [String,optional] subjectid # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly @@ -61,34 +85,27 @@ module OpenTox @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end - # PENDING: creates summary as used for ToxCreate - def summary - if @metadata[OT.classificationStatistics] - res = { - :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted], - :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect], - :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc], - } - @metadata[OT.classificationStatistics][OT.classValueStatistics].each do |s| - if s[OT.classValue].to_s=="true" - res[:true_positives] = s[OT.numTruePositives] - res[:false_positives] = s[OT.numFalsePositives] - res[:true_negatives] = s[OT.numTrueNegatives] - res[:false_negatives] = s[OT.numFalseNegatives] - res[:sensitivity] = s[OT.truePositiveRate] - res[:specificity] = s[OT.trueNegativeRate] - break + # returns confusion matrix as array, predicted values are in rows + # example: + # [[nil,"active","moderate","inactive"],["active",1,3,99],["moderate",4,2,8],["inactive",3,8,6]] + # -> 99 inactive compounds have been predicted as active + def confusion_matrix + raise "no classification statistics, probably a regression valdiation" unless @metadata[OT.classificationStatistics] + matrix = @metadata[OT.classificationStatistics][OT.confusionMatrix][OT.confusionMatrixCell] + values = matrix.collect{|cell| cell[OT.confusionMatrixPredicted]}.uniq + table = [[nil]+values] + values.each do |c| + table << [c] + values.each do |r| + matrix.each do |cell| + if cell[OT.confusionMatrixPredicted]==c and cell[OT.confusionMatrixActual]==r + table[-1] << cell[OT.confusionMatrixValue].to_f + break + end end end - res - elsif @metadata[OT.regressionStatistics] - { - :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted], - :r_square => @metadata[OT.regressionStatistics][OT.rSquare], - :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError], - :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError], - } end + table end end @@ -147,9 +164,9 @@ module OpenTox @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end - # PENDING: creates summary as used for ToxCreate - def summary( subjectid=nil ) - Validation.from_cv_statistics( @uri, subjectid ).summary + # returns a Validation object containing the statistics of the crossavlidation + def statistics( subjectid=nil ) + Validation.from_cv_statistics( @uri, subjectid ) end end @@ -198,7 +215,6 @@ module OpenTox # @param [String,optional] subjectid # @return [OpenTox::CrossvalidationReport] def self.find( uri, subjectid=nil ) - # PENDING load report data? OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid}) rep = CrossvalidationReport.new(uri) rep.load_metadata( subjectid ) @@ -227,6 +243,54 @@ module OpenTox end end + + class AlgorithmComparisonReport + include OpenTox + + # finds AlgorithmComparisonReport via uri, raises error if not found + # @param [String] uri + # @param [String,optional] subjectid + # @return [OpenTox::CrossvalidationReport] + def self.find( uri, subjectid=nil ) + OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid}) + rep = AlgorithmComparisonReport.new(uri) + rep.load_metadata( subjectid ) + rep + end + + # finds AlgorithmComparisonReport for a particular crossvalidation + # @param [String] crossvalidation uri + # @param [String,optional] subjectid + # @return [OpenTox::AlgorithmComparisonReport] nil if no report found + def self.find_for_crossvalidation( crossvalidation_uri, subjectid=nil ) + uris = RestClientWrapper.get(File.join(CONFIG[:services]["opentox-validation"], + "/report/algorithm_comparison?crossvalidation="+crossvalidation_uri), {:subjectid => subjectid}).chomp.split("\n") + uris.size==0 ? nil : AlgorithmComparisonReport.new(uris[-1]) + end + + # creates a crossvalidation report via crossvalidation + # @param [Hash] crossvalidation uri_hash, see example + # @param [String,optional] subjectid + # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly + # @return [OpenTox::AlgorithmComparisonReport] + # example for hash: + # { :lazar-bbrc => [ http://host/validation/crossvalidation/x1, http://host/validation/crossvalidation/x2 ], + # :lazar-last => [ http://host/validation/crossvalidation/xy, http://host/validation/crossvalidation/xy ] } + def self.create( crossvalidation_uri_hash, subjectid=nil, waiting_task=nil ) + identifier = [] + validation_uris = [] + crossvalidation_uri_hash.each do |id, uris| + uris.each do |uri| + identifier << id + validation_uris << uri + end + end + uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/report/algorithm_comparison"), + { :validation_uris => validation_uris.join(","), :identifier => identifier.join(","), :subjectid => subjectid }, {}, waiting_task ) + AlgorithmComparisonReport.new(uri) + end + end + class QMRFReport include OpenTox |