diff options
-rw-r--r-- | 5x_cv/5x_crossvalidation.rb | 58 | ||||
-rwxr-xr-x | 5x_cv/comparealgs_dv.sh | 14 | ||||
-rw-r--r-- | 5x_cv/dataset_config | 13 | ||||
-rw-r--r-- | 5x_cv/exceptions_config.yaml | 6 | ||||
-rw-r--r-- | 5x_cv/factors_config | 1 | ||||
-rw-r--r-- | 5x_cv/lib/cv_am.rb | 161 | ||||
-rwxr-xr-x | 5x_cv/wrapper5cv.sh | 38 | ||||
-rw-r--r-- | all.rb | 1 | ||||
-rw-r--r-- | dataset.rb | 67 | ||||
-rw-r--r-- | fminer.rb | 34 | ||||
-rw-r--r-- | r-util.rb | 222 | ||||
-rw-r--r-- | test-util.rb | 36 | ||||
-rw-r--r-- | toxcreate.rb | 198 | ||||
-rw-r--r-- | validation.rb | 66 |
14 files changed, 744 insertions, 171 deletions
diff --git a/5x_cv/5x_crossvalidation.rb b/5x_cv/5x_crossvalidation.rb new file mode 100644 index 0000000..bdde2dc --- /dev/null +++ b/5x_cv/5x_crossvalidation.rb @@ -0,0 +1,58 @@ +# Do a five times 10-fold crossvalidation +# # Author: Andreas Maunz, David Vorgrimmler +# # @params: CSV-File, Method (LAST, BBRC), Minimum Frequency + +require 'rubygems' +require 'opentox-ruby' +require 'lib/cv_am.rb' + +subjectid = nil + +if ARGV.size != 1 + puts + puts "Error! Arguments: <algorithm_params> in the form p1=v1;p2=v2;...;pn=vn" + exit 1 +end + +# Arguments for lib/cv.rb: file_or_dataset_uri feature_generation min_frequency min_chisq_significance backbone stratified random_seed prediction_algorithm local_svm_kernel nr_hits conf_stdev +position_mapper={ + "dataset_uri" => 0, + "feature_generation_uri" => 1, + "min_frequency" => 2, + "min_chisq_significance" => 3, + "backbone" => 4, + "stratified" => 5, + "random_seed" => 6, + "prediction_algorithm" => 7, + "local_svm_kernel" => 8, + "nr_hits" => 9, + "conf_stdev" => 10 +} + +param_str=$ARGV[0] +puts param_str +params = Array.new(position_mapper.size,"") +param_str.split(";").each { |param| + k,v = param.split("=") + params[position_mapper[k]] = v +} +params[5]="false" # stratified + +exception_config = YAML.load_file("exceptions_config.yaml") +if ! exception_config[params[0]].nil? + exception_config[params[0]].each { |k,v| + puts "Setting exception: #{k} => #{v}" + params[position_mapper[k]] = v + } +end + +for i in 1..5 + begin + puts + puts "Round #{i.to_s}." + params[6]=i # random seed + cv(params) + rescue Exception => e + puts "Error in 5xCV: #{e.message}: #{e.backtrace}" + end +end diff --git a/5x_cv/comparealgs_dv.sh b/5x_cv/comparealgs_dv.sh new file mode 100755 index 0000000..1b7a7b4 --- /dev/null +++ b/5x_cv/comparealgs_dv.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +if [ $# -ne 4 ]; then + echo "\"validation_uri1,validation_uri2,...\" \"identifier1,identifier2,...\" \"significance [0.95-0.6]\" \"attributes: weighted_r_square,weighted_root_mean_squared_error,weighted_mean_absolute_error,r_square,root_mean_squared_error,sample_correlation_coefficient\"" + exit 1 +fi + +uris="$1" +iden="$2" +signi="$3" #default 0.9; 0.95 - 0.6 +attri="$4" #weighted_r_square,weighted_root_mean_squared_error,weighted_mean_absolute_error,r_square,root_mean_squared_error,sample_correlation_coefficient +host="toxcreate3.in-silico.ch:8080" + +curl -X POST -d "validation_uris=$uris" -d "identifier=$iden" -d "ttest_significance=$signi" -d "ttest_attributes=$attri" http://$host/validation/report/algorithm_comparison diff --git a/5x_cv/dataset_config b/5x_cv/dataset_config new file mode 100644 index 0000000..a52649c --- /dev/null +++ b/5x_cv/dataset_config @@ -0,0 +1,13 @@ +#EPA v4b Fathead Minnow Acute Toxicity LC50_mmol +http://toxcreate3.in-silico.ch:8080/dataset/2133 +#CPDBAS_v5d_20Nov2008_rat_TD50 +#http://toxcreate3.in-silico.ch:8080/dataset/1408 +#CPDBAS_v5d_20Nov2008_mouse_TD50 +#http://toxcreate3.in-silico.ch:8080/dataset/1384 +#MultiCellCall: DSSTox Carcinogenic Potency DBS MultiCellCall_no_duplicates.csv +#http://toxcreate3.in-silico.ch:8080/dataset/130 +#Bloodbarr: bloodbarr_no_duplicate.csv +#http://toxcreate3.in-silico.ch:8080/dataset/271 +#Salmonella Mutagenicity: DSSTox Carcinogenic Potency DBS Mutagenicity_no_duplicates.csv +#http://toxcreate3.in-silico.ch:8080/dataset/233 + diff --git a/5x_cv/exceptions_config.yaml b/5x_cv/exceptions_config.yaml new file mode 100644 index 0000000..7124c62 --- /dev/null +++ b/5x_cv/exceptions_config.yaml @@ -0,0 +1,6 @@ +http://toxcreate3.in-silico.ch:8080/dataset/271: + min_frequency: 12 +http://x61s.fdm.uni-freiburg.de/dataset/3546: + min_frequency: 8 +http://x61s.fdm.uni-freiburg.de/dataset/3543: + min_frequency: 6 diff --git a/5x_cv/factors_config b/5x_cv/factors_config new file mode 100644 index 0000000..72dbb5f --- /dev/null +++ b/5x_cv/factors_config @@ -0,0 +1 @@ +feature_generation_uri=http://toxcreate3.in-silico.ch:8080/algorithm/fminer/bbrc diff --git a/5x_cv/lib/cv_am.rb b/5x_cv/lib/cv_am.rb new file mode 100644 index 0000000..965cd5b --- /dev/null +++ b/5x_cv/lib/cv_am.rb @@ -0,0 +1,161 @@ +# Do a 10-fold crossvalidation with mutiple datasets +# Author: Andreas Maunz, David Vorgrimmler +# @params: CSV-File, Method (LAST, BBRC), Minimum Frequency + +def cv (args) + + subjectid = nil#OpenTox::Authorization.authenticate(guest,guest) + + if args.size != 11 + puts + puts "Error! Arguments: file_or_dataset_uri feature_generation min_frequency min_chisq_significance backbone stratified random_seed prediction_algorithm local_svm_kernel nr_hits conf_stdev" + exit 1 + end + + reg=/^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$/ix + + file=args[0] + + + # dataset_is_uri=false + # if reg.match(file)? true : false + # #file.include? "http" + # puts "Uri is valid" + dataset_is_uri=true +# files = [ file ] + # elsif ! File.exists? file + # puts "File #{file} missing" + # exit 1 + # end + +# if args[1].to_s != "last" && args[1].to_s != "bbrc" + if !(args[1].to_s.include? "/algorithm/fminer/bbrc") && !(args[1].to_s.include? "/algorithm/fminer/last") + puts "feature_generation_uri must contain '/algorithm/fminer/last' or '/algorithm/fminer/bbrc'" +# puts "feature_generation must be 'last' or 'bbrc'" + exit 1 + end + + if ! args[2] == "" + if args[2].to_i < 2 + puts "min_frequency must be at least 2 or \"\"" + exit 1 + end + end + + if ! args[3] == "" + if ! (args[3].to_f <= 1.0 && args[3].to_f >= 0.0) + puts "min_chisq_significance must be between 0 and 1 or \"\"" + exit 1 + end + end + + if ! args[4] == "" + if args[4].to_s != "true" && args[4].to_s != "false" + puts "backbone must be 'true' or 'false'." + exit 1 + end + end + + + if args[5].to_s != "true" && args[5].to_s != "false" + puts "stratified must be 'true' or 'false'" + exit 1 + end + + if ! args[6] == "" + if ! (args[6].to_i <= 1) + puts "random_seed must be a natural number or \"\"" + exit 1 + end + end + + if ! args[7] == "" + if ! (args[7] == "local_svm_classification") + puts "lazar_prediction_method must be \"local_svm_classification\"" + exit 1 + end + end + + if ! args[8] == "" + if ! (args[8] == "weighted_tanimoto" || args[8] == "propositionalized") + puts "local_svm_kernel must be \"weighted_tanimoto\" or \"propositionalized\"" + exit 1 + end + end + + if ! args[9] == "" + if ! (args[9] == "true") + puts "nr_hits must be \"true\"" + exit 1 + end + end + + if ! args[10] == "" + if ! (args[10] == "true") + puts "conf_stdev must be \"true\"" + exit 1 + end + end + + + + #if !dataset_is_uri + # # Upload a dataset + # training_dataset = OpenTox::Dataset.create_from_csv_file(file, subjectid) + # prediction_feature = training_dataset.features.keys[0] + # training_dataset_uri=training_dataset.uri + # puts prediction_feature + #else + training_dataset_uri=file + puts training_dataset_uri + prediction_feature = OpenTox::Dataset.find(training_dataset_uri).features.keys.first + puts prediction_feature + # end + puts training_dataset_uri + + + # Crossvalidation + # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,num_folds(10),random_seed(1),stratified(false)) + alg_params = "feature_generation_uri=#{args[1]}"; + alg_params = alg_params << ";min_frequency=#{args[2]}" unless args[2]=="" + alg_params = alg_params << ";min_chisq_significance=#{args[3]}" unless args[3]=="" + alg_params = alg_params << ";backbone=#{args[4]}" unless args[4]=="" + alg_params = alg_params << ";prediction_algorithm=#{args[7]}" unless args[7]=="" + alg_params = alg_params << ";local_svm_kernel=#{args[8]}" unless args[8]=="" + alg_params = alg_params << ";nr_hits=#{args[9]}" unless args[9]=="" + alg_params = alg_params << ";conf_stdev=#{args[10]}" unless args[10]=="" + + stratified_param = args[5] + random_seed_param = args[6] + + cv_args = {:dataset_uri => training_dataset_uri, :prediction_feature => prediction_feature, :algorithm_uri => args[1].split('fminer')[0] + "lazar", :algorithm_params => alg_params, :stratified => stratified_param } + cv_args[:random_seed] = random_seed_param unless random_seed_param == "" + puts file + puts cv_args.to_yaml + puts + begin + lazar_single_args = {} + lazar_single_args[:feature_generation_uri] = "#{args[1]}"; + lazar_single_args[:min_frequency] = args[2] unless args[2]=="" + lazar_single_args[:min_chisq_significance] = args[3] unless args[3]=="" + lazar_single_args[:backbone] = args[4] unless args[4]=="" + lazar_single_args[:prediction_algorithm] = args[7] unless args[7]=="" + lazar_single_args[:local_svm_kernel] = args[8] unless args[8]=="" + lazar_single_args[:nr_hits] = args[9] unless args[9]=="" + lazar_single_args[:conf_stdev] = args[10] unless args[10]=="" + #m = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => training_dataset_uri, :subjectid => subjectid}.merge lazar_single_args ).to_s + #puts m + cv = OpenTox::Crossvalidation.create(cv_args).uri + puts cv + cvr = OpenTox::CrossvalidationReport.create( cv , subjectid).uri + puts cvr + #qmrfr = OpenTox::QMRFReport.create(m).uri + #puts qmrfr + #cv_stat = OpenTox::Validation.from_cv_statistics( cv, subjectid ) + #puts cv_stat.metadata.to_yaml + #[ cv_stat, training_dataset_uri ] + rescue Exception => e + puts "cv failed: #{e.message} #{e.backtrace}" + end + +end diff --git a/5x_cv/wrapper5cv.sh b/5x_cv/wrapper5cv.sh new file mode 100755 index 0000000..2155635 --- /dev/null +++ b/5x_cv/wrapper5cv.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# Wrapper Skript for CV +# Set Factors, Datasets, Exceptions in the respective config_files +# AM, 2011 + +if [ $# -lt 2 ]; then + echo "Usage: $0 factors datasets" + exit +fi + +# Configure basics +source $HOME/.bash_aliases +otconfig +THIS_DATE=`date +%Y%m%d_%H_` +FACTORS="$1" +DATASETS="$2" + +# Don't start when running +while ps x | grep 5x | grep -v grep >/dev/null 2>&1; do sleep 3; done + +LOGFILE="$THIS_DATE""$USER""_wrapper5cv.log" +rm "$LOGFILE" >/dev/null 2>&1 + +cat $DATASETS | while read dataset_uri; do + if ! [[ "$dataset_uri" =~ "#" ]]; then # allow comments + cat $FACTORS | while read factor; do + if ! [[ "$factor" =~ "#" ]]; then # allow comments + echo "${THIS_DATE}: $factor" >> $LOGFILE>&1 + factor="$factor;dataset_uri=$dataset_uri" + echo "ruby 5x_crossvalidation.rb $factor" >> $LOGFILE 2>&1 + ruby 5x_crossvalidation.rb $factor >> $LOGFILE 2>&1 + fi + done + else + echo >> $LOGFILE 2>&1 + echo $dataset_uri >> $LOGFILE 2>&1 + fi +done @@ -13,3 +13,4 @@ require './lazar.rb' require './validation.rb' require './toxcreate.rb' require './transform.rb' +require './r-util.rb' @@ -4,7 +4,8 @@ require 'test/unit' require 'validate-owl' class DatasetTest < Test::Unit::TestCase - + include TestUtil + def setup @datasets = { @@regression_training_dataset.uri => nil, @@ -224,37 +225,47 @@ class DatasetTest < Test::Unit::TestCase end end - - def dataset_equal(d,d2) - assert d.compounds.sort==d2.compounds.sort, - d.compounds.sort.to_yaml+"\n!=\n"+d2.compounds.sort.to_yaml - assert d.features.keys.size==d2.features.keys.size, - d.features.keys.to_yaml+"\n!=\n"+d2.features.keys.to_yaml - assert d.features.keys.sort==d2.features.keys.sort, - d.features.keys.sort.to_yaml+"\n!=\n"+d2.features.keys.sort.to_yaml - d.compounds.each do |c| - d.features.keys.each do |f| - assert_array_about_equal d.data_entries[c][f],d2.data_entries[c][f] + + def test_merge() + #upload + dataset1 = OpenTox::Dataset.create_from_csv_file(File.new("data/hamster_carcinogenicity.csv").path, @@subjectid) + dataset2 = OpenTox::Dataset.create_from_csv_file(File.new("data/multi_cell_call.csv").path, @@subjectid) + #merge1 + title = "test merge" + dataset_merge1 = OpenTox::Dataset.merge(dataset1, dataset2, { DC.title => title,DC.creator => "testsuite"}, @@subjectid ) + dataset_reloaded1 = OpenTox::Dataset.find(dataset_merge1.uri, @@subjectid) + #test1 + [dataset_merge1, dataset_reloaded1].each do |d| + assert_equal d.metadata[DC.title],title + assert_equal d.features.size,(dataset1.features.size+dataset2.features.size) + assert_equal d.compounds.size,(dataset1.compounds+dataset2.compounds).uniq.size + [dataset1, dataset2].each do |d_i| + d_i.compounds.each{|c| assert d.compounds.include?(c)} + d_i.features.keys.each{|f| assert d.features.keys.include?(f)} + d_i.features.keys.each do |f| + assert_equal d_i.features[f],d.features[f] + d_i.compounds do |c| + assert_equal d_i.data_entries[c][f],d.data_entries[c][f] + end + end end end - end - - def assert_array_about_equal(a,a2) - if (a!=nil || a2!=nil) - raise "no arrays #{a.class} #{a2.class}" unless a.is_a?(Array) and a2.is_a?(Array) - assert a.size==a2.size - a.sort! - a2.sort! - a.size.times do |i| - if (a[i].is_a?(Float) and a2[i].is_a?(Float)) - assert (a[i]-a2[i]).abs<0.0000001,"#{a[i]}(#{a[i].class}) != #{a2[i]}(#{a2[i].class})" - else - assert a[i]==a2[i],"#{a[i]}(#{a[i].class}) != #{a2[i]}(#{a2[i].class})" - end - end + #merge2 + compounds1 = dataset1.compounds[0..dataset1.compounds.size/2] + features1 = [] + dataset_merge2 = OpenTox::Dataset.merge(dataset1, dataset2, {}, @@subjectid, features1, nil, compounds1 ) + dataset_reloaded2 = OpenTox::Dataset.find(dataset_merge2.uri, @@subjectid) + #test2 + [dataset_merge2, dataset_reloaded2].each do |d| + assert_equal d.features.size,dataset2.features.size + assert_equal d.compounds.size,(compounds1+dataset2.compounds).uniq.size + end + #cleanup + [dataset_merge1, dataset_merge2, dataset1, dataset2].each do |d| + OpenTox::RestClientWrapper.delete(d.uri,{:subjectid => @@subjectid}) end end - + def validate(data) assert_kind_of OpenTox::Dataset, @dataset assert_equal @dataset.data_entries.size, data[:nr_data_entries] if data @@ -363,5 +363,39 @@ end #} cleanup end + + def test_match + feature = @@classification_training_dataset.features.keys.first + feature_dataset_uri = OpenTox::Algorithm::Fminer::BBRC.new.run({ + :dataset_uri => @@classification_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid}).to_s + feature_dataset = OpenTox::Dataset.find(feature_dataset_uri,@@subjectid) + tmp_resources = [ feature_dataset_uri ] + [true,false].each do |hits| + matched_dataset_uri = OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc","match"), + {:feature_dataset_uri => feature_dataset_uri, :dataset_uri => @@multinomial_training_dataset.uri, + :nr_hits => hits, :subjectid => @@subjectid}).to_s + tmp_resources << matched_dataset_uri + matched_dataset = OpenTox::Dataset.find(matched_dataset_uri,@@subjectid) + # matched dataset should have same features as feature dataset + assert_equal feature_dataset.features.keys.sort,matched_dataset.features.keys.sort + # matched datset should have same compounds as input dataset for matching + assert_equal matched_dataset.compounds.sort,@@multinomial_training_dataset.compounds.sort + matched_dataset.compounds.each do |c| + matched_dataset.features.keys.each do |f| + if matched_dataset.data_entries[c] and matched_dataset.data_entries[c][f] + v = matched_dataset.data_entries[c][f] + if hits + assert_equal v.size,1 + assert v[0].is_a?(Integer) + assert v[0]>0 + else + assert_equal v,[1] + end + end + end + end + end + tmp_resources.each{|uri| OpenTox::RestClientWrapper.delete(uri,{:subjectid=>@@subjectid})} + end end diff --git a/r-util.rb b/r-util.rb new file mode 100644 index 0000000..8ff5b80 --- /dev/null +++ b/r-util.rb @@ -0,0 +1,222 @@ +require 'rubygems' +require 'opentox-ruby' +require 'test/unit' +require 'validate-owl' +require 'test-util' + +DELETE = true + +class RUtilTest < Test::Unit::TestCase + include TestUtil + + def global_setup + unless defined?(@@subjectid) + @@subjectid = OpenTox::Authorization.authenticate("guest","guest") + @@signout = true + end + @@rutil = OpenTox::RUtil.new + @@hamster = OpenTox::Dataset.create_from_csv_file(File.new("data/hamster_carcinogenicity.csv").path, @@subjectid) + pred_feature = @@hamster.features.keys[0] + fminer = File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc") + hamster_features_uri = OpenTox::RestClientWrapper.post(fminer, + {:dataset_uri=>@@hamster.uri,:prediction_feature=>pred_feature,:subjectid=>@@subjectid}).to_s + @@hamster_features = OpenTox::Dataset.find(hamster_features_uri,@@subjectid) + @@resources = [@@hamster.uri, hamster_features_uri] + end + + def global_teardown + OpenTox::Authorization.logout(@@subjectid) if defined?(@@signout) + @@rutil.quit_r + if DELETE + @@resources.each do |uri| + OpenTox::RestClientWrapper.delete(uri,{:subjectid=>@@subjectid}) + end + else + puts "Resources "+@@resources.to_yaml + end + end + + def pre_files(files) + files.each{|f| File.delete(f) if File.exist?(f)} + end + + def post_files(files) + files.each{|f| assert File.exist?(f)} + if DELETE + files.each{|f| File.delete(f) if File.exist?(f)} + else + puts "Plotted to "+files.to_yaml + end + end + + def test_paired_ttest + puts "ttest" + x = Array.new(1000,0).collect{|e| rand()} + y = Array.new(1000,0).collect{|e| rand()} + res = @@rutil.paired_ttest(x,y) + puts "x >> y ? #{res}" + assert_equal res,0 + y = Array.new(1000,0).collect{|e| rand()-0.1} + res = @@rutil.paired_ttest(x,y) + puts "x >> y ? #{res}" + assert res>0 + end + + def test_boxplot + puts "boxplot" + files = ["/tmp/box.svg","/tmp/box.png"] + pre_files(files) + data = [ + [ :method, [4,4,5,5,4,3,2] ], + [ :method2, [1,2,3,4,5,4,6] ], + [ :asdf, [9,1,8,0,7,1,6] ] ] + @@rutil.boxplot(files, data, "comparison1" ) + post_files(files) + end + + def test_double_hist_plot + puts "double_hist_plot" + hist_num_log = "/tmp/hist_num_log.svg" + hist_num = "/tmp/hist_num.svg" + hist_cat = "/tmp/hist_cat.svg" + pre_files [hist_num_log,hist_num,hist_cat] + data1 = Array.new(1000,0).collect{|x| rand()*rand()} + data2 = Array.new(1000,0).collect{|x| rand()*rand()*rand()} + @@rutil.double_hist_plot([hist_num_log], data1, data2, true, true ) + @@rutil.double_hist_plot([hist_num], data1, data2, true, false ) + data1 = "a,a,a,a,b,b,b,b,b,b,b,b,b,b,c,c,c".split(",") + data2 = "a,a,a,a,a,a,b,b,b,b,b,b,c,c,c,c,c,d,d,d,d,d".split(",") + @@rutil.double_hist_plot([hist_cat], data1, data2, false ) + post_files [hist_num_log,hist_num,hist_cat] + end + + def test_dataset_to_dataframe + puts "dataset_to_dataframe" + dataframe = @@rutil.dataset_to_dataframe(@@hamster,"NA",@@subjectid) + dataset_conv = @@rutil.dataframe_to_dataset(dataframe,@@subjectid) + dataset_conv_reloaded = OpenTox::Dataset.find(dataset_conv.uri,@@subjectid) + @@resources << dataset_conv.uri + dataset_equal(@@hamster,dataset_conv) + dataset_equal(@@hamster,dataset_conv_reloaded) + + feats = @@hamster_features.features.keys[0..(@@hamster_features.features.keys.size/2)] + dataframe = @@rutil.dataset_to_dataframe(@@hamster_features,"NA",@@subjectid,feats) + dataset_conv = @@rutil.dataframe_to_dataset(dataframe,@@subjectid) + dataset_conv_reloaded = OpenTox::Dataset.find(dataset_conv.uri,@@subjectid) + @@resources << dataset_conv.uri + [dataset_conv, dataset_conv_reloaded].each do |d| + assert_equal d.compounds.sort,@@hamster_features.compounds.sort + assert_equal d.features.keys.sort,feats.sort + d.compounds.each do |c| + d.features.keys.each do |f| + if @@hamster_features.data_entries[c]==nil || @@hamster_features.data_entries[c][f]==nil + assert d.data_entries[c]==nil || d.data_entries[c][f]==nil + else + assert_not_nil d.data_entries[c] + assert_equal @@hamster_features.data_entries[c][f],d.data_entries[c][f] + end + end + end + end + + dataframe = @@rutil.dataset_to_dataframe(@@hamster_features,"NA",@@subjectid) + @@rutil.r.eval "#{dataframe} <- #{dataframe}[2:10,10:20]" + dataset_conv = @@rutil.dataframe_to_dataset(dataframe,@@subjectid) + dataset_conv_reloaded = OpenTox::Dataset.find(dataset_conv.uri,@@subjectid) + @@resources << dataset_conv.uri + [dataset_conv,dataset_conv_reloaded].each do |d| + assert_equal d.compounds.size,9 + d.compounds.size.times do |i| + assert_equal d.compounds[i],@@hamster_features.compounds[i+1] + end + assert_equal d.features.size,11 + d.compounds.each do |c| + d.features.keys.each do |f| + if @@hamster_features.data_entries[c]==nil || @@hamster_features.data_entries[c][f]==nil + assert d.data_entries[c]==nil || d.data_entries[c][f]==nil + else + assert_not_nil d.data_entries[c] + assert_equal @@hamster_features.data_entries[c][f],d.data_entries[c][f] + end + end + end + end + end + + def stratified_split + unless defined?@@strat + @@split_ratio = 0.05 + @@split_has_duplicates = false #hamster has no duplicates +# res = @@rutil.stratified_split(@@hamster,0,@@split_ratio,1) +# @@resources += [ res[0].uri, res[1].uri ] +# @@strat = { :data => @@hamster, :split1 => res[0], :split2 => res[1] } + data_combined = OpenTox::Dataset.merge(@@hamster,@@hamster_features,{},@@subjectid) + res = @@rutil.stratified_split(data_combined,0,@@split_ratio,@@subjectid,1) + @@resources += [ data_combined.uri, res[0].uri, res[1].uri ] + @@strat = {:data => data_combined, :split1 => res[0], :split2 => res[1] } + end + @@strat + end + + def test_stratified_split + puts "test_stratified_split" + split = stratified_split + size = split[:data].compounds.size + size1 = split[:split1].compounds.size + size2 = split[:split2].compounds.size + assert_equal size,(split[:split1].compounds+split[:split2].compounds).uniq.size + unless @@split_has_duplicates + assert_equal (@@split_ratio*size).round,size1, + "Dataset #{size} should be split into #{(@@split_ratio*size).round}/#{size-(@@split_ratio*size).round}"+ + " (exact: #{@@split_ratio*size}), instead: #{size1}/#{size2}" + end + split[:data].compounds.each do |c| + include1 = split[:split1].compounds.include?(c) + include2 = split[:split2].compounds.include?(c) + unless @@split_has_duplicates + assert(((include1 and !include2) or (!include1 and include2))) + else + assert((include1 or include2)) + end + end + end + + def test_feature_value_plot + puts "feature_value_plot" + split = stratified_split + data = split[:data] + dataset1 = data.split( data.compounds[0..4], data.features.keys, {}, @@subjectid) + dataset2 = data.split( data.compounds[5..-1], data.features.keys, {}, @@subjectid) + @@resources += [dataset1.uri, dataset2.uri] + files = [] + #plot + [true,false].each do |fast_embedding| + next if fast_embedding==false and !@@rutil.package_installed?("smacof") + random_file = "/tmp/feature_value_plot_random_fast#{fast_embedding}.svg" + stratified_file = "/tmp/feature_value_plot_stratified_fast#{fast_embedding}.svg" + pre_files [random_file, stratified_file] + @@rutil.feature_value_plot([random_file], dataset1.uri, dataset2.uri, + "first five", "rest", nil, fast_embedding, @@subjectid) + @@rutil.feature_value_plot([stratified_file], split[:split1].uri, split[:split2].uri, + "five percent stratified", "rest", nil, fast_embedding, @@subjectid) + files += [random_file, stratified_file] + end + #cleanup + post_files files + end + + # hack to have a global_setup and global_teardown + def teardown + if((@@expected_test_count-=1) == 0) + global_teardown + end + end + + def setup + unless defined?@@expected_test_count + @@expected_test_count = (self.class.instance_methods.reject{|method| method[0..3] != 'test'}).length + global_setup + end + end + +end diff --git a/test-util.rb b/test-util.rb new file mode 100644 index 0000000..2620656 --- /dev/null +++ b/test-util.rb @@ -0,0 +1,36 @@ +require 'rubygems' +require 'test/unit' + +module TestUtil + + def dataset_equal(d,d2) + assert d.compounds.sort==d2.compounds.sort, + d.compounds.sort.to_yaml+"\n!=\n"+d2.compounds.sort.to_yaml + assert d.features.keys.size==d2.features.keys.size, + d.features.keys.to_yaml+"\n!=\n"+d2.features.keys.to_yaml + assert d.features.keys.sort==d2.features.keys.sort, + d.features.keys.sort.to_yaml+"\n!=\n"+d2.features.keys.sort.to_yaml + d.compounds.each do |c| + d.features.keys.each do |f| + assert_array_about_equal d.data_entries[c][f],d2.data_entries[c][f] + end + end + end + + def assert_array_about_equal(a,a2) + if (a!=nil || a2!=nil) + raise "no arrays #{a.class} #{a2.class}" unless a.is_a?(Array) and a2.is_a?(Array) + assert a.size==a2.size + a.sort! + a2.sort! + a.size.times do |i| + if (a[i].is_a?(Float) and a2[i].is_a?(Float)) + assert (a[i]-a2[i]).abs<0.0000001,"#{a[i]}(#{a[i].class}) != #{a2[i]}(#{a2[i].class})" + else + assert a[i]==a2[i],"#{a[i]}(#{a[i].class}) != #{a2[i]}(#{a2[i].class})" + end + end + end + end + +end
\ No newline at end of file diff --git a/toxcreate.rb b/toxcreate.rb index 9b40e0e..14e61c4 100644 --- a/toxcreate.rb +++ b/toxcreate.rb @@ -4,12 +4,14 @@ require 'opentox-ruby' require 'test/unit' require 'akephalos' require 'capybara/dsl' +gem 'capybara-envjs' +require 'capybara/envjs' # gem install capybara-envjs +# requires firefox 3.6 for akephalos and selenium !!! Capybara.default_driver = :akephalos # use this without visual inspection #Capybara.default_driver = :selenium # use this for visual inspection Capybara.run_server = false Capybara.default_wait_time = 1000 -#Capybara.javascript_driver = :selenium - +Capybara.javascript_driver = :envjs class ToxCreateTest < Test::Unit::TestCase include Capybara @@ -18,37 +20,38 @@ class ToxCreateTest < Test::Unit::TestCase @user = "guest" @password = "guest" end - - def teardown - end - -=begin +=begin # works only with AA enabled def test_01_login visit File.join(CONFIG[:services]["opentox-toxcreate"], "login") - click_button "Login" - puts "Login without credentials" - assert page.has_content?("Please enter username and password.") + assert page.has_content?('User: guest') fill_in('Username', :with => @user) fill_in('Password', :with => @password + "nonsense") - click_button "Login" - puts "Login with wrong password" - assert page.has_content?('Login failed. Please try again.') - fill_in('Username', :with => "anonymous") - fill_in('Password', :with => "anonymous") - click_button "Login" - assert page.has_content?("Welcome anonymous!") + click_button('Login') + assert page.has_content? "Login failed. Please try again." + fill_in('Username', :with => @user) + fill_in('Password', :with => @password) + click_button('Login') + assert page.has_content? "Welcome #{@user}!" visit File.join(CONFIG[:services]["opentox-toxcreate"], "login") - click_button "Login as guest" - puts "Login as user guest" - assert page.has_content?("Welcome guest!") + assert page.has_content?('User: guest') + fill_in('Username', :with => @user) + fill_in('Password', :with => @password + "nonsense") + click_button('Login') + assert page.has_content? "Login failed. Please try again." + click_button('Login as guest') + assert page.has_content? "Welcome #{@user}!" end - +=end def test_02_toxcreate # works only with akephalos + # create a model and check status is complete Capybara.current_driver = :akephalos - #login(@browser, @user, @password) - visit CONFIG[:services]["opentox-toxcreate"] + visit File.join(CONFIG[:services]["opentox-toxcreate"], "create") assert page.has_content?('Upload training data') - attach_file('file', "./data/hamster_carcinogenicity.mini.csv") + attach_file('file', "./data/hamster_carcinogenicity.csv") + assert page.has_button?('endpoint_list_button') + click_on "Select endpoint" + choose('EcotoxicEffects') + choose('Acute_toxicity_to_fish_lethality') click_on "Create model" assert first("h2").has_content? "hamster_carcinogenicity" time = 0 @@ -57,119 +60,84 @@ class ToxCreateTest < Test::Unit::TestCase time +=5 end assert first(".model_status").has_content?("Completed") + sleep 5 end -=end + def test_03_predict + # predict with the model from test_02 Capybara.register_driver :akephalos do |app| Capybara::Driver::Akephalos.new(app, :validate_scripts => false) + visit File.join(CONFIG[:services]["opentox-toxcreate"], "predict") + fill_in 'identifier', :with => 'NNc1ccccc1' + find(:xpath, '//form/fieldset[2]/input[contains(@name, "select")]').click + click_button("Predict") + click_button("Details") + page.has_content? "false" + page.has_content? "0.294" + page.has_content? "0.875" + page.has_content? "next" end - session = Capybara::Session.new(:akephalos) - session.visit CONFIG[:services]["opentox-toxcreate"] - session.click_on "Predict" - session.fill_in "or enter a Smiles string", :with => "NNc1ccccc1" - session.check "hamster carcinogenicity" - session.click_button "Predict" - assert session.has_content?("Not enough similar compounds in training dataset") - session.click_on "Confidence" - assert session.has_content?("Indicates the applicability domain of a model") - session.click_button "Details" - - #assert page.has_content? "false" - #assert page.has_content? "0.294" - #assert page.has_content? "0.875" - end -=begin - def test_04_inspect_policies - Capybara.current_driver = :selenium - visit CONFIG[:services]["opentox-toxcreate"] - click_on "Inspect" - assert first('h2').has_content? 'hamster_carcinogenicity' - click_on "edit" - click_on "manage policy" - within(:xpath, '//form[contains(@id, "form_policy_group_member_")]') do - find(:xpath, './/input[5]').click - click_on "update" - end - sleep 5 - end - - def test_05_inspect_policies - Capybara.current_driver = :selenium - visit CONFIG[:services]["opentox-toxcreate"] - click_on "Inspect" - assert first('h2').has_content? 'hamster_carcinogenicity' - click_on "edit" - click_on "manage policy" - - within(:xpath, '//form[contains(@id, "form_policy_group_member_")]') do - find(:xpath, './/input[4]').click - click_on "update" - end - sleep 5 - end - - def test_06_inspect_policies - Capybara.current_driver = :selenium - visit CONFIG[:services]["opentox-toxcreate"] - click_on "Inspect" - assert first('h2').has_content? 'hamster_carcinogenicity' - click_on "edit" - click_on "manage policy" - within(:xpath, '//form[contains(@id, "form_development")]') do - find(:xpath, './/input[4]').click - click_on "add" - end - sleep 5 end - - def test_07_inspect_policies - Capybara.current_driver = :selenium - visit CONFIG[:services]["opentox-toxcreate"] - click_on "Inspect" - assert first('h2').has_content? 'hamster_carcinogenicity' - click_on "edit" - click_on "manage policy" - within(:xpath, '//form[contains(@id, "form_policy_group_development_")]') do - find(:xpath, './/input[3]').click - click_on "update" - end + + def test_04_delete_model + # delete the model from test_02 + visit File.join(CONFIG[:services]["opentox-toxcreate"], "models") + assert (first(".model_status").has_content?("Completed") or first(".model_status").has_content?("Error")) + click_on "delete" + page.evaluate_script('window.confirm = function() { return true; }') sleep 5 - page.evaluate_script('window.confirm = function() { return true; }') - click_on "delete" end - =begin - def test_08_multi_cell_call + def test_09_multi_cell_call #login(@browser, @user, @password) - Capybara.current_driver = :akephalos - visit CONFIG[:services]["opentox-toxcreate"] + #Capybara.current_driver = :akephalos + visit File.join(CONFIG[:services]["opentox-toxcreate"], "create") assert page.has_content?('Upload training data') attach_file('file', "./data/multi_cell_call.csv") click_on "Create model" end - def test_09_kazius - Capybara.current_driver = :akephalos + def test_10_kazius + #Capybara.current_driver = :akephalos #login(@browser, @user, @password) - visit CONFIG[:services]["opentox-toxcreate"] + visit File.join(CONFIG[:services]["opentox-toxcreate"], "create") assert page.has_content?('Upload training data') attach_file('file', "./data/kazius.csv") # wait until validation is completed # check results (links, reports, results) puts @browser.url end - - def test_10_parallel_models - #login(@browser, @user, @password) - 10.times do - visit CONFIG[:services]["opentox-toxcreate"] + + def test_11_parallel_models + 5.times do + visit File.join(CONFIG[:services]["opentox-toxcreate"], "create") assert page.has_content?('Upload training data') - attach_file('file', "./data/multi_cell_call.csv") + attach_file('file', "./data/hamster_carcinogenicity.csv") click_on "Create model" end + while (first(".model_status").has_no_content?("Completed") and first(".model_status").has_no_content?("Error")) do + sleep 1 + end + assert first(".model_status").has_content?("Completed") + end + + def test_12_delete_parallel_models + 5.times do + visit File.join(CONFIG[:services]["opentox-toxcreate"], "models") + click_on "delete" + page.evaluate_script('window.confirm = function() { return true; }') + #sleep 5 + end + sleep 5 + visit File.join(CONFIG[:services]["opentox-toxcreate"], "models") + while page.has_no_content? "There are currently no models" do + sleep 1 + end + assert page.has_content? "There are currently no models" end +=begin # raises capybara errors, but gui works from browser - def test_11_toxcreate_sdf # works only with akephalos + def test_12_toxcreate_sdf # works only with akephalos Capybara.current_driver = :akephalos #login(@browser, @user, @password) visit CONFIG[:services]["opentox-toxcreate"] @@ -186,14 +154,4 @@ class ToxCreateTest < Test::Unit::TestCase end =end - -=begin -def login(browser, user, password) - browser.goto File.join(CONFIG[:services]["opentox-toxcreate"], "login") - browser.text_field(:id, "username").set(user) - browser.text_field(:id, "password").set(password) - browser.button(:value, "Login").click -end - -=end end diff --git a/validation.rb b/validation.rb index 06254c3..919a562 100644 --- a/validation.rb +++ b/validation.rb @@ -19,8 +19,7 @@ end class ValidationTest < Test::Unit::TestCase @@delete = true - #@@feature_types = ["bbrc", "last"] - @@feature_types = ["bbrc"] + @@hamster_cv_feature_types = ["bbrc", "last"] @@qmrf_test = true @@data = [] @@data << { :type => :crossvalidation, @@ -37,7 +36,8 @@ class ValidationTest < Test::Unit::TestCase :test_data => "http://apps.ideaconsult.net:8080/ambit2/dataset/435293?page=30&pagesize=10", :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/533748", :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/435293?page=0&pagesize=300" } - @@files = { File.new("data/hamster_carcinogenicity.csv") => :crossvalidation, + @@files = { + File.new("data/hamster_carcinogenicity.csv") => :crossvalidation, #File.new("data/hamster_carcinogenicity.mini.csv") => :crossvalidation, #File.new("data/EPAFHM.csv") => :crossvalidation, File.new("data/EPAFHM.mini.csv") => :crossvalidation, @@ -91,7 +91,7 @@ class ValidationTest < Test::Unit::TestCase def test_training_test_split - @@vs = [] + @@vs = [] unless defined?@@vs @@data.each do |data| if data[:type]==:split_validation puts "test_training_test_split "+data[:info].to_s @@ -112,7 +112,7 @@ class ValidationTest < Test::Unit::TestCase def t.waiting_for(task_uri); end v = OpenTox::Validation.create_training_test_split(p, @@subjectid, t) assert v.uri.uri? - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do OpenTox::Validation.find(v.uri) end @@ -120,6 +120,7 @@ class ValidationTest < Test::Unit::TestCase v = OpenTox::Validation.find(v.uri, @@subjectid) assert_valid_date v assert v.uri.uri? + assert_prob_correct(v) model = v.metadata[OT.model] assert model.uri? v_list = OpenTox::Validation.list( {:model => model} ) @@ -133,7 +134,7 @@ class ValidationTest < Test::Unit::TestCase def test_training_test_validation - @@vs = [] + @@vs = [] unless defined?@@vs @@data.each do |data| if data[:type]==:training_test_validation puts "test_training_test_validation "+data[:info].to_s @@ -153,7 +154,7 @@ class ValidationTest < Test::Unit::TestCase def t.waiting_for(task_uri); end v = OpenTox::Validation.create_training_test_validation(p, @@subjectid, t) assert v.uri.uri? - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do OpenTox::Validation.find(v.uri) end @@ -161,6 +162,7 @@ class ValidationTest < Test::Unit::TestCase v = OpenTox::Validation.find(v.uri, @@subjectid) assert_valid_date v assert v.uri.uri? + assert_prob_correct(v) model = v.metadata[OT.model] assert model.uri? v_list = OpenTox::Validation.list( {:model => model} ) @@ -180,16 +182,17 @@ class ValidationTest < Test::Unit::TestCase puts "test_validation_report" assert defined?v,"no validation defined" assert_kind_of OpenTox::Validation,v - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do OpenTox::ValidationReport.create(v.uri) end end report = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid) - assert report==nil,"report already exists for validation\nreport: "+(report ? report.uri.to_s : "")+"\nvalidation: "+v.uri.to_s - report = OpenTox::ValidationReport.create(v.uri,@@subjectid) + assert_nil report,"report already exists for validation\nreport: "+(report ? report.uri.to_s : "")+"\nvalidation: "+v.uri.to_s + params = {:min_confidence => 0.05} + report = OpenTox::ValidationReport.create(v.uri,params,@@subjectid) assert report.uri.uri? - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do OpenTox::ValidationReport.find(report.uri) end @@ -225,7 +228,8 @@ class ValidationTest < Test::Unit::TestCase @@cv_identifiers = [] @@data.each do |data| if data[:type]==:crossvalidation - @@feature_types.each do |fminer| + @@hamster_cv_feature_types.each do |fminer| + next unless (fminer==@@hamster_cv_feature_types[0] or data[:info].to_s =~ /hamster_carcinogenicity.csv/) puts "test_crossvalidation "+data[:info].to_s+" "+fminer p = { :dataset_uri => data[:data], @@ -245,7 +249,7 @@ class ValidationTest < Test::Unit::TestCase def t.waiting_for(task_uri); end cv = OpenTox::Crossvalidation.create(p, @@subjectid, t) assert cv.uri.uri? - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do OpenTox::Crossvalidation.find(cv.uri) end @@ -253,13 +257,14 @@ class ValidationTest < Test::Unit::TestCase cv = OpenTox::Crossvalidation.find(cv.uri, @@subjectid) assert_valid_date cv assert cv.uri.uri? - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do cv.statistics(cv) end end stats_val = cv.statistics(@@subjectid) assert_kind_of OpenTox::Validation,stats_val + assert_prob_correct(stats_val) algorithm = cv.metadata[OT.algorithm] assert algorithm.uri? @@ -299,7 +304,7 @@ class ValidationTest < Test::Unit::TestCase #assert_rest_call_error OpenTox::NotFoundError do # OpenTox::CrossvalidationReport.find_for_crossvalidation(cv.uri) #end - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do OpenTox::CrossvalidationReport.create(cv.uri) end @@ -307,7 +312,7 @@ class ValidationTest < Test::Unit::TestCase assert OpenTox::CrossvalidationReport.find_for_crossvalidation(cv.uri,@@subjectid)==nil report = OpenTox::CrossvalidationReport.create(cv.uri,@@subjectid) assert report.uri.uri? - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do OpenTox::CrossvalidationReport.find(report.uri) end @@ -334,16 +339,18 @@ class ValidationTest < Test::Unit::TestCase assert_kind_of OpenTox::Crossvalidation,@@cvs[j] hash = { @@cv_identifiers[i] => [@@cvs[i].uri], @@cv_identifiers[j] => [@@cvs[j].uri] } - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do - OpenTox::AlgorithmComparisonReport.create hash,@@subjectid + OpenTox::AlgorithmComparisonReport.create hash,{} end end assert OpenTox::AlgorithmComparisonReport.find_for_crossvalidation(@@cvs[i].uri,@@subjectid)==nil assert OpenTox::AlgorithmComparisonReport.find_for_crossvalidation(@@cvs[j].uri,@@subjectid)==nil - report = OpenTox::AlgorithmComparisonReport.create hash,@@subjectid + + params = {:ttest_significance => 0.95, :ttest_attributes => "real_runtime,percent_unpredicted", :max_num_predictions => 5} + report = OpenTox::AlgorithmComparisonReport.create hash,params,@@subjectid assert report.uri.uri? - if @@subjectid + if AA_SERVER assert_rest_call_error OpenTox::NotAuthorizedError do OpenTox::AlgorithmComparisonReport.find(report.uri) end @@ -412,14 +419,16 @@ class ValidationTest < Test::Unit::TestCase # checks RestCallError type def assert_rest_call_error( ex ) - if ex==OpenTox::NotAuthorizedError and @@subjectid==nil + if ex==OpenTox::NotAuthorizedError and AA_SERVER==nil puts "AA disabled: skipping test for not authorized" return end begin yield + assert false,"no rest-call error thrown" rescue OpenTox::RestCallError => e - raise "error Report of RestCallError is no errorReport: "+e.errorCause.class.to_s+":\n"+e.errorCause.to_yaml unless e.errorCause.is_a?(OpenTox::ErrorReport) + raise "error Report of RestCallError is no errorReport: "+ + e.errorCause.class.to_s+":\n"+e.errorCause.to_yaml unless e.errorCause.is_a?(OpenTox::ErrorReport) report = e.errorCause while report.errorCause report = report.errorCause @@ -441,6 +450,18 @@ class ValidationTest < Test::Unit::TestCase assert time>Time.new-(10*60),opentox_object.uri.to_s+" took longer than 10 minutes "+time.to_s =end end + + def assert_prob_correct( validation ) + class_stats = validation.metadata[OT.classificationStatistics] + if class_stats != nil + class_value_stats = class_stats[OT.classValueStatistics] + class_value_stats.each do |cs| + #puts cs[OT.positivePredictiveValue] + #puts validation.probabilities(0,cs[OT.classValue]).inspect + assert cs[OT.positivePredictiveValue]==validation.probabilities(0,cs[OT.classValue],@@subjectid)[:probs][cs[OT.classValue]] + end + end + end # hack to have a global_setup and global_teardown def teardown @@ -457,4 +478,3 @@ class ValidationTest < Test::Unit::TestCase end - |