summaryrefslogtreecommitdiff
path: root/5x_cv/lib/cv_am.rb
diff options
context:
space:
mode:
Diffstat (limited to '5x_cv/lib/cv_am.rb')
-rw-r--r--5x_cv/lib/cv_am.rb161
1 files changed, 161 insertions, 0 deletions
diff --git a/5x_cv/lib/cv_am.rb b/5x_cv/lib/cv_am.rb
new file mode 100644
index 0000000..965cd5b
--- /dev/null
+++ b/5x_cv/lib/cv_am.rb
@@ -0,0 +1,161 @@
+# Do a 10-fold crossvalidation with mutiple datasets
+# Author: Andreas Maunz, David Vorgrimmler
+# @params: CSV-File, Method (LAST, BBRC), Minimum Frequency
+
+def cv (args)
+
+ subjectid = nil#OpenTox::Authorization.authenticate(guest,guest)
+
+ if args.size != 11
+ puts
+ puts "Error! Arguments: file_or_dataset_uri feature_generation min_frequency min_chisq_significance backbone stratified random_seed prediction_algorithm local_svm_kernel nr_hits conf_stdev"
+ exit 1
+ end
+
+ reg=/^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$/ix
+
+ file=args[0]
+
+
+ # dataset_is_uri=false
+ # if reg.match(file)? true : false
+ # #file.include? "http"
+ # puts "Uri is valid"
+ dataset_is_uri=true
+# files = [ file ]
+ # elsif ! File.exists? file
+ # puts "File #{file} missing"
+ # exit 1
+ # end
+
+# if args[1].to_s != "last" && args[1].to_s != "bbrc"
+ if !(args[1].to_s.include? "/algorithm/fminer/bbrc") && !(args[1].to_s.include? "/algorithm/fminer/last")
+ puts "feature_generation_uri must contain '/algorithm/fminer/last' or '/algorithm/fminer/bbrc'"
+# puts "feature_generation must be 'last' or 'bbrc'"
+ exit 1
+ end
+
+ if ! args[2] == ""
+ if args[2].to_i < 2
+ puts "min_frequency must be at least 2 or \"\""
+ exit 1
+ end
+ end
+
+ if ! args[3] == ""
+ if ! (args[3].to_f <= 1.0 && args[3].to_f >= 0.0)
+ puts "min_chisq_significance must be between 0 and 1 or \"\""
+ exit 1
+ end
+ end
+
+ if ! args[4] == ""
+ if args[4].to_s != "true" && args[4].to_s != "false"
+ puts "backbone must be 'true' or 'false'."
+ exit 1
+ end
+ end
+
+
+ if args[5].to_s != "true" && args[5].to_s != "false"
+ puts "stratified must be 'true' or 'false'"
+ exit 1
+ end
+
+ if ! args[6] == ""
+ if ! (args[6].to_i <= 1)
+ puts "random_seed must be a natural number or \"\""
+ exit 1
+ end
+ end
+
+ if ! args[7] == ""
+ if ! (args[7] == "local_svm_classification")
+ puts "lazar_prediction_method must be \"local_svm_classification\""
+ exit 1
+ end
+ end
+
+ if ! args[8] == ""
+ if ! (args[8] == "weighted_tanimoto" || args[8] == "propositionalized")
+ puts "local_svm_kernel must be \"weighted_tanimoto\" or \"propositionalized\""
+ exit 1
+ end
+ end
+
+ if ! args[9] == ""
+ if ! (args[9] == "true")
+ puts "nr_hits must be \"true\""
+ exit 1
+ end
+ end
+
+ if ! args[10] == ""
+ if ! (args[10] == "true")
+ puts "conf_stdev must be \"true\""
+ exit 1
+ end
+ end
+
+
+
+ #if !dataset_is_uri
+ # # Upload a dataset
+ # training_dataset = OpenTox::Dataset.create_from_csv_file(file, subjectid)
+ # prediction_feature = training_dataset.features.keys[0]
+ # training_dataset_uri=training_dataset.uri
+ # puts prediction_feature
+ #else
+ training_dataset_uri=file
+ puts training_dataset_uri
+ prediction_feature = OpenTox::Dataset.find(training_dataset_uri).features.keys.first
+ puts prediction_feature
+ # end
+ puts training_dataset_uri
+
+
+ # Crossvalidation
+ # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,num_folds(10),random_seed(1),stratified(false))
+ alg_params = "feature_generation_uri=#{args[1]}";
+ alg_params = alg_params << ";min_frequency=#{args[2]}" unless args[2]==""
+ alg_params = alg_params << ";min_chisq_significance=#{args[3]}" unless args[3]==""
+ alg_params = alg_params << ";backbone=#{args[4]}" unless args[4]==""
+ alg_params = alg_params << ";prediction_algorithm=#{args[7]}" unless args[7]==""
+ alg_params = alg_params << ";local_svm_kernel=#{args[8]}" unless args[8]==""
+ alg_params = alg_params << ";nr_hits=#{args[9]}" unless args[9]==""
+ alg_params = alg_params << ";conf_stdev=#{args[10]}" unless args[10]==""
+
+ stratified_param = args[5]
+ random_seed_param = args[6]
+
+ cv_args = {:dataset_uri => training_dataset_uri, :prediction_feature => prediction_feature, :algorithm_uri => args[1].split('fminer')[0] + "lazar", :algorithm_params => alg_params, :stratified => stratified_param }
+ cv_args[:random_seed] = random_seed_param unless random_seed_param == ""
+ puts file
+ puts cv_args.to_yaml
+ puts
+ begin
+ lazar_single_args = {}
+ lazar_single_args[:feature_generation_uri] = "#{args[1]}";
+ lazar_single_args[:min_frequency] = args[2] unless args[2]==""
+ lazar_single_args[:min_chisq_significance] = args[3] unless args[3]==""
+ lazar_single_args[:backbone] = args[4] unless args[4]==""
+ lazar_single_args[:prediction_algorithm] = args[7] unless args[7]==""
+ lazar_single_args[:local_svm_kernel] = args[8] unless args[8]==""
+ lazar_single_args[:nr_hits] = args[9] unless args[9]==""
+ lazar_single_args[:conf_stdev] = args[10] unless args[10]==""
+ #m = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => training_dataset_uri, :subjectid => subjectid}.merge lazar_single_args ).to_s
+ #puts m
+ cv = OpenTox::Crossvalidation.create(cv_args).uri
+ puts cv
+ cvr = OpenTox::CrossvalidationReport.create( cv , subjectid).uri
+ puts cvr
+ #qmrfr = OpenTox::QMRFReport.create(m).uri
+ #puts qmrfr
+ #cv_stat = OpenTox::Validation.from_cv_statistics( cv, subjectid )
+ #puts cv_stat.metadata.to_yaml
+ #[ cv_stat, training_dataset_uri ]
+ rescue Exception => e
+ puts "cv failed: #{e.message} #{e.backtrace}"
+ end
+
+end