summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-06-09 10:32:54 +0200
committermguetlein <martin.guetlein@gmail.com>2012-06-09 10:32:54 +0200
commit048a5ed44719e8de1057f249809df0ba890f53d0 (patch)
treee19c326dd42705eac5ba0db28efbff9dc52c56b1
parent782fbe9454be9593263d27f6ce069d2ad0638894 (diff)
new branch val_exp
-rw-r--r--.gitignore3
-rw-r--r--application.rb103
-rw-r--r--dataset_test.rb74
3 files changed, 180 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index d21a58b..908c6bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+.buildpath
+.loadpath
+.project
api_key.rb
*.sqlite3
tmp/*
diff --git a/application.rb b/application.rb
index 4df696c..bbef234 100644
--- a/application.rb
+++ b/application.rb
@@ -113,6 +113,88 @@ helpers do
dataset.features.keys.each { |f| dataset.features[f][OT.hasSource] = dataset.metadata[OT.hasSource] unless dataset.features[f][OT.hasSource]}
File.open("#{@@datadir}/#{@id}.json","w+"){|f| f.puts dataset.to_json}
end
+
+ def to_arff(dataset, subjectid=nil, features=nil )
+
+ LOGGER.debug "convert dataset to arff #{dataset.uri}"
+
+ # count duplicates
+ num_compounds = {}
+ dataset.features.keys.each do |f|
+ dataset.compounds.each do |c|
+ if dataset.data_entries[c]
+ val = dataset.data_entries[c][f]
+ size = val==nil ? 1 : val.size
+ num_compounds[c] = num_compounds[c]==nil ? size : [num_compounds[c],size].max
+ else
+ num_compounds[c] = 1
+ end
+ end
+ end
+
+ puts "found duplicates"
+
+ # use either all, or the provided features, sorting is important as col-index := features
+ if features
+ features.sort!
+ else
+ features = dataset.features.keys.sort
+ end
+ compounds = []
+ compound_names = []
+ dataset.compounds.each do |c|
+ count = 0
+ num_compounds[c].times do |i|
+ compounds << c
+ compound_names << "#{c}$#{count}"
+ count+=1
+ end
+ end
+
+ missing = {}
+
+ arff = "@RELATION #{dataset.uri}\n\n"
+ features.each do |f|
+ numeric = dataset.features[f][RDF.type].to_a.flatten.include?(OT.NumericFeature)
+ #feat = OpenTox::Feature.find(f,subjectid)
+ #numeric = feat.metadata[RDF.type].to_a.flatten.include?(OT.NumericFeature)
+ if numeric
+ arff << "@ATTRIBUTE '#{f}' NUMERIC\n"
+ else
+ # HACK for binary 0,1 features without accept values (as returned by fminer):
+ # set missing value to 0 instead ?
+ # set accept value to [0,1]
+ accept_values = dataset.accept_values(f)
+ missing[f] = "0" if accept_values==nil
+ arff << "@ATTRIBUTE '#{f}' {#{(accept_values==nil ? [0,1] : accept_values).join(",")}}\n"
+ end
+ end
+
+ puts "found feature types"
+
+ arff << "\n@DATA\n"
+
+ dataset.compounds.each do |c|
+ num_compounds[c].times do |i|
+ c_values = []
+ features.each do |f|
+ accept_values =
+ if dataset.data_entries[c]
+ val = dataset.data_entries[c][f]
+ v = val==nil ? "" : val[i].to_s
+ else
+ raise "wtf" if i>0
+ v = ""
+ end
+ v = (missing[f]==nil ? "?" : missing[f]) if v.size()==0
+ c_values << v
+ end
+ arff << "#{c_values.join(",")}\n"
+ end
+ end
+ arff
+ end
+
end
before do
@@ -170,6 +252,19 @@ get '/?' do
end
end
+post '/:id/rdf' do
+ response['Content-Type'] = 'text/uri-list'
+ task = OpenTox::Task.create("Converting dataset to rdf ", @uri) do
+ file = "#{@@datadir}/#{params[:id]}.rdfxml"
+ unless File.exists? file # lazy rdfxml generation
+ dataset = OpenTox::Dataset.from_json File.read(@json_file)
+ File.open(file,"w+") { |f| f.puts dataset.to_rdfxml }
+ end
+ @uri
+ end
+ return_task task
+end
+
# Get a dataset representation
# @param [Header] Accept one of `application/rdf+xml, application-x-yaml, text/csv, application/ms-excel` (default application/rdf+xml)
# @return [application/rdf+xml, application-x-yaml, text/csv, application/ms-excel] Dataset representation
@@ -187,6 +282,14 @@ get '/:id' do
when /json/
send_file @json_file, :type => 'application/x-yaml'
+ when /arff/
+ file = "#{@@datadir}/#{params[:id]}.arff"
+ unless File.exists? file # lazy yaml generation
+ dataset = OpenTox::Dataset.from_json File.read(@json_file)
+ File.open(file,"w+") { |f| f.puts to_arff(dataset) }
+ end
+ send_file file, :type => 'application/x-yaml'
+
when /yaml/
file = "#{@@datadir}/#{params[:id]}.yaml"
unless File.exists? file # lazy yaml generation
diff --git a/dataset_test.rb b/dataset_test.rb
new file mode 100644
index 0000000..5f1db3f
--- /dev/null
+++ b/dataset_test.rb
@@ -0,0 +1,74 @@
+require "rubygems"
+require "sinatra"
+before {
+ request.env['HTTP_HOST']="local-ot/dataset"
+ request.env["REQUEST_URI"]=request.env["PATH_INFO"]
+}
+
+require "opentox-ruby"
+ENV['RACK_ENV'] = 'test'
+require 'application.rb'
+require 'test/unit'
+require 'rack/test'
+LOGGER = Logger.new(STDOUT)
+LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S "
+
+module Sinatra
+
+ set :raise_errors, false
+ set :show_exceptions, false
+
+ module UrlForHelper
+ BASE = "http://local-ot/dataset"
+ def url_for url_fragment, mode=:path_only
+ case mode
+ when :path_only
+ raise "not impl"
+ when :full
+ end
+ "#{BASE}#{url_fragment}"
+ end
+ end
+end
+
+class DatasetTest < Test::Unit::TestCase
+ include Rack::Test::Methods
+
+ def app
+ Sinatra::Application
+ end
+
+ def test_sth
+
+ begin
+
+ #http://local-ot/dataset/452
+ #http://local-ot/dataset/453
+
+ get '/504',nil,'HTTP_ACCEPT' => "text/arff"
+ puts last_response.body
+
+ #delete '/cleanup'
+ #puts last_response.body
+
+ rescue => ex
+ rep = OpenTox::ErrorReport.create(ex, "")
+ puts rep.to_yaml
+ end
+
+ end
+
+ # see test_util.rb
+ def wait_for_task(uri)
+ if uri.task_uri?
+ task = OpenTox::Task.find(uri)
+ task.wait_for_completion
+ raise "task failed: "+uri.to_s if task.error?
+ uri = task.result_uri
+ end
+ return uri
+ end
+
+
+
+end \ No newline at end of file