diff options
author | gebele <gebele@in-silico.ch> | 2017-03-07 12:04:23 +0000 |
---|---|---|
committer | gebele <gebele@in-silico.ch> | 2017-03-07 12:04:23 +0000 |
commit | 6bd7492c2cc0e7887e99f00ad5f9a8e8fd094392 (patch) | |
tree | 154266ca1f5dff52c4512daafcec52fbfe1ce790 | |
parent | 61b98e011d4b32e88629d5a07d7b84a6abdded64 (diff) |
updated scripts and readme
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | README.md | 7 | ||||
-rw-r--r-- | compare_validation_reports.rb | 104 | ||||
-rw-r--r-- | create_prediction_models.rb | 19 | ||||
-rw-r--r-- | lazar_validation_reports.rb | 55 |
5 files changed, 178 insertions, 8 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..279b45b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +dump/ @@ -2,5 +2,8 @@ - Public lazar datasets - Scripts to generate prediction models and crossvalidations - -`mongorestore --host 127.0.0.1` imports the database dump +- Scripts to generate and compare validation reports +``` ruby + # comment or uncomment for your needs + ruby create_prediction_models.rb & +``` diff --git a/compare_validation_reports.rb b/compare_validation_reports.rb new file mode 100644 index 0000000..8852a34 --- /dev/null +++ b/compare_validation_reports.rb @@ -0,0 +1,104 @@ +#!/usr/bin/env ruby +require 'optparse' +require 'json' + + +options = {} +OptionParser.new do |opts| + opts.banner = "Usage: compare_validation_reports.rb [options]" + + opts.on("-d d", "--dir=dir", "Path to the validation reports dir.") do |dir| + options[:dir] = (dir[-1,1] == "/" ? dir : dir + "/") + end + + opts.on("-c", "--classification", "Select only classification reports from dir.") do |c| + if options[:regression] + puts "Don't use optional parameters -c and -r at the same time. Mixed by default." + exit + end + options[:classification] = c + end + + opts.on("-r", "--regression", "Select only regression reports from dir.") do |r| + if options[:classification] + puts "Don't use optional parameters -c and -r at the same time. Mixed by default." + exit + end + options[:regression] = r + end + + opts.on("-v", "--verbose", "Display verbose report. Standard for -d mode without -c or -r parameters.") do |v| + options[:verbose] = v + end + + opts.on("-f f", "--file=files", "Select two or more comma seperated reports.") do |files| + list = files.split(",") + unless list.size == 2 + puts "You have to pass at least two files as argument with full path." + exit + end + options[:files] = list + end + + opts.on("-h", "--help", "Displays help") do + puts opts + exit + end + +end.parse! + +if options.empty? || (!options[:files] && !options[:dir]) + puts "Usage: compare_validation_reports.rb -h" + exit +end + +if options[:dir] + if options[:verbose] + if !options[:classification] && !options[:regression] + json = Dir[options[:dir]+'*.json'].map { |f| JSON.parse File.read(f) }.flatten + puts JSON.pretty_generate json + end + if options[:classification] + json = Dir[options[:dir]+'*_classification_*.json'].map { |f| JSON.parse File.read(f) }.flatten + puts JSON.pretty_generate json + end + if options[:regression] + json = Dir[options[:dir]+'*_regression_*.json'].map { |f| JSON.parse File.read(f) }.flatten + puts JSON.pretty_generate json + end + else + + main = {} + + if !options[:classification] && !options[:regression] && !options[:verbose] + json = Dir[options[:dir]+'*.json'].map { |f| JSON.parse File.read(f) }.flatten + puts JSON.pretty_generate json + end + if options[:classification] + json = Dir[options[:dir]+'*_classification_*.json'].map { |f| JSON.parse File.read(f) }.flatten + json.each do |report| + main[report["endpoint"]] ||= [] + main[report["endpoint"]] << [report["species"], report["created_at"], report["crossvalidations"].map{|cv| {"accuracy": cv[1]["statistics"]["accuracy"], "weighted_accuracy": cv[1]["statistics"]["weighted_accuracy"], "true_rate": cv[1]["statistics"]["true_rate"], "predictivity": cv[1]["statistics"]["predictivity"]}}.flatten] + end + puts JSON.pretty_generate main + end + if options[:regression] + json = Dir[options[:dir]+'*_regression_*.json'].map { |f| JSON.parse File.read(f) }.flatten + json.each do |report| + main[report["endpoint"]] ||= [] + main[report["endpoint"]] << [report["species"], report["created_at"], report["crossvalidations"].map{|cv| {"rmse": cv[1]["statistics"]["rmse"], "r_squared": cv[1]["statistics"]["r_squared"]}}.flatten] + end + puts JSON.pretty_generate main + end + end +end + +if options[:files] + json = [] + options[:files].each do |file| + json << JSON.parse(File.read(file)) + end + puts JSON.pretty_generate json.flatten + +end + diff --git a/create_prediction_models.rb b/create_prediction_models.rb index c653f01..9c3995f 100644 --- a/create_prediction_models.rb +++ b/create_prediction_models.rb @@ -2,22 +2,24 @@ ENV["LAZAR_ENV"] = "production" require_relative '../lazar/lib/lazar' #require 'lazar' include OpenTox -$mongo.database.drop -$gridfs = $mongo.database.fs # recreate GridFS indexes +#$mongo.database.drop +#$gridfs = $mongo.database.fs # recreate GridFS indexes -#=begin +=begin # classification models Dir["classification/*csv"].each do |file| unless file.match(/hamster/) Model::Validation.from_csv_file file end end -#=end +=end #=begin # regression models Dir["regression/*log10.csv"].each do |file| - Model::Validation.from_csv_file file + unless file.match(/fathead/)#until dublicates not cleared + Model::Validation.from_csv_file file + end end #=end @@ -62,6 +64,11 @@ feature_categories.each do |category| end =end -# save +# save local dump but git ignored `mongodump -h 127.0.0.1 -d production` + +# build reports and users dump +eval File.read('./lazar_validation_reports.rb') + +# restore #`mongorestore --host 127.0.0.1` diff --git a/lazar_validation_reports.rb b/lazar_validation_reports.rb new file mode 100644 index 0000000..5621577 --- /dev/null +++ b/lazar_validation_reports.rb @@ -0,0 +1,55 @@ +ENV["LAZAR_ENV"] = "production" +require_relative '../lazar/lib/lazar' +#require 'lazar' +require 'json' +include OpenTox + +models = Model::Validation.all +size = models.size +puts "#{size} reports to store." + +# create dir if not exists +path = "#{ENV['HOME']}/lazar-validation-reports" +dir = FileUtils.mkdir_p path + +models.each_with_index do |model, idx| + + @json = {} + + # define file name + type = model.regression? ? "regression" : "classification" + #name = model.model.name.gsub!(/[^0-9A-Za-z.\-]/, '_') + date = model.created_at.to_s.split.first + name = (model.endpoint + "_" + model.species).gsub!(/[^0-9A-Za-z.\-]/, '_') + branch = model.model.version["branch"] + commit = model.model.version["commit"] + filename = [date,type,branch,commit,name].join("_") + + # collect object data + @json["endpoint"] = model.endpoint + @json["species"] = model.species + @json["source"] = model.source + @json["training_dataset"] = model.training_dataset.source + @json["training_compounds"] = model.training_dataset.data_entries.size + @json["algorithms"] = model.algorithms + @json["name"] = model.model.name + @json["created_at"] = model.created_at + @json["unit"] = model.unit + @json["version"] = model.model.version + @json["crossvalidations"] = {} + model.crossvalidations.each_with_index do |cv,idx| + @json["crossvalidations"][idx.to_s] = {"folds": cv.folds, "instances": cv.nr_instances, "unpredicted": cv.nr_unpredicted, "statistics": cv.statistics} + end + + # write report to file + File.open("#{path}/#{filename}.json", "w") do |f| + f.write(JSON.pretty_generate(JSON.parse(@json.to_json))) + end + + puts "#{size - (idx+1)} left to store." + +end + +# store database dump +puts "Storing database dump." +`mongodump -h 127.0.0.1 -o #{path}/#{Time.now.to_s.split.first}-dump-#{ENV["LAZAR_ENV"]} -d #{ENV["LAZAR_ENV"]}` |