summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/crossvalidation.rb36
-rw-r--r--lib/dataset.rb31
-rw-r--r--test/dataset.rb1
-rw-r--r--test/setup.rb4
4 files changed, 40 insertions, 32 deletions
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index cbffb7c..2e6dabb 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -128,26 +128,28 @@ module OpenTox
end
def confidence_plot
- tmpfile = "/tmp/#{id.to_s}_confidence.svg"
- accuracies = []
- confidences = []
- correct_predictions = 0
- incorrect_predictions = 0
- predictions.each do |p|
- if p[1] and p[2]
- p[1] == p [2] ? correct_predictions += 1 : incorrect_predictions += 1
- accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f
- confidences << p[3]
+ unless confidence_plot_id
+ tmpfile = "/tmp/#{id.to_s}_confidence.svg"
+ accuracies = []
+ confidences = []
+ correct_predictions = 0
+ incorrect_predictions = 0
+ predictions.each do |p|
+ if p[1] and p[2]
+ p[1] == p [2] ? correct_predictions += 1 : incorrect_predictions += 1
+ accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f
+ confidences << p[3]
+ end
end
+ R.assign "accuracy", accuracies
+ R.assign "confidence", confidences
+ R.eval "image = qplot(confidence,accuracy)+ylab('accumulated accuracy')+scale_x_reverse()"
+ R.eval "ggsave(file='#{tmpfile}', plot=image)"
+ file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg")
+ plot_id = $gridfs.insert_one(file)
+ update(:confidence_plot_id => plot_id)
end
- R.assign "accuracy", accuracies
- R.assign "confidence", confidences
- R.eval "image = qplot(confidence,accuracy)+ylab('accumulated accuracy')+scale_x_reverse()"
- R.eval "ggsave(file='#{tmpfile}', plot=image)"
- file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg")
- plot_id = $gridfs.insert_one(file)
- update(:confidence_plot_id => plot_id)
$gridfs.find_one(_id: confidence_plot_id).data
end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 60f3bb5..d989bdf 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -5,21 +5,23 @@ module OpenTox
class Dataset
- attr_writer :data_entries
+ #attr_writer :data_entries
# associations like has_many, belongs_to deteriorate performance
field :feature_ids, type: Array, default: []
field :compound_ids, type: Array, default: []
- field :data_entries_id, type: BSON::ObjectId#, default: []
+ #field :data_entries_id, type: BSON::ObjectId
+ field :data_entries, type: Array, default: []
field :source, type: String
# Save all data including data_entries
# Should be used instead of save
def save_all
- dump = Marshal.dump(@data_entries)
- file = Mongo::Grid::File.new(dump, :filename => "#{self.id.to_s}.data_entries")
- entries_id = $gridfs.insert_one(file)
- update(:data_entries_id => entries_id)
+ save
+ #dump = Marshal.dump(@data_entries)
+ #file = Mongo::Grid::File.new(dump, :filename => "#{self.id.to_s}.data_entries")
+ #entries_id = $gridfs.insert_one(file)
+ #update(:data_entries_id => entries_id)
end
# Readers
@@ -36,6 +38,7 @@ module OpenTox
@features
end
+=begin
# Get all data_entries
def data_entries
unless @data_entries
@@ -60,6 +63,7 @@ module OpenTox
end
@data_entries
end
+=end
# Find data entry values for a given compound and feature
# @param compound [OpenTox::Compound] OpenTox Compound object
@@ -220,7 +224,8 @@ module OpenTox
value_time = 0
# compounds and values
- @data_entries = [] #Array.new(table.size){Array.new(table.first.size-1)}
+ #@data_entries = [] #Array.new(table.size){Array.new(table.first.size-1)}
+ self.data_entries = []
table.each_with_index do |vals,i|
ct = Time.now
@@ -251,16 +256,16 @@ module OpenTox
end
compound_ids << compound.id
- table.first.size == 0 ? @data_entries << Array.new(0) : @data_entries << Array.new(table.first.size-1)
+ table.first.size == 0 ? self.data_entries << Array.new(0) : self.data_entries << Array.new(table.first.size-1)
vals.each_with_index do |v,j|
if v.blank?
warnings << "Empty value for compound '#{identifier}' (row #{r+2}) and feature '#{feature_names[j]}' (column #{j+2})."
next
elsif numeric[j]
- @data_entries.last[j] = v.to_f
+ self.data_entries.last[j] = v.to_f
else
- @data_entries.last[j] = v.strip
+ self.data_entries.last[j] = v.strip
end
end
end
@@ -272,7 +277,7 @@ module OpenTox
$logger.debug "Value parsing: #{Time.now-time} (Compound creation: #{compound_time})"
time = Time.now
- save_all
+ save
$logger.debug "Saving: #{Time.now-time}"
end
@@ -281,9 +286,9 @@ module OpenTox
# @param any value
def fill_nil_with n
(0 .. compound_ids.size-1).each do |i|
- @data_entries[i] ||= []
+ data_entries[i] ||= []
(0 .. feature_ids.size-1).each do |j|
- @data_entries[i][j] ||= n
+ data_entries[i][j] ||= n
end
end
end
diff --git a/test/dataset.rb b/test/dataset.rb
index 60f917c..47a6c25 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -141,6 +141,7 @@ class DatasetTest < MiniTest::Test
def test_from_csv
d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
+ p d
assert_equal Dataset, d.class
assert_equal 1, d.features.size
assert_equal 85, d.compounds.size
diff --git a/test/setup.rb b/test/setup.rb
index 3825282..dc577b3 100644
--- a/test/setup.rb
+++ b/test/setup.rb
@@ -4,5 +4,5 @@ require_relative '../lib/lazar.rb'
include OpenTox
TEST_DIR ||= File.expand_path(File.dirname(__FILE__))
DATA_DIR ||= File.join(TEST_DIR,"data")
-#$mongo.database.drop
-#$gridfs = $mongo.database.fs
+$mongo.database.drop
+$gridfs = $mongo.database.fs