diff options
author | mguetlein <martin.guetlein@gmail.com> | 2012-06-09 10:32:54 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2012-06-09 10:32:54 +0200 |
commit | 048a5ed44719e8de1057f249809df0ba890f53d0 (patch) | |
tree | e19c326dd42705eac5ba0db28efbff9dc52c56b1 | |
parent | 782fbe9454be9593263d27f6ce069d2ad0638894 (diff) |
new branch val_exp
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | application.rb | 103 | ||||
-rw-r--r-- | dataset_test.rb | 74 |
3 files changed, 180 insertions, 0 deletions
@@ -1,3 +1,6 @@ +.buildpath +.loadpath +.project api_key.rb *.sqlite3 tmp/* diff --git a/application.rb b/application.rb index 4df696c..bbef234 100644 --- a/application.rb +++ b/application.rb @@ -113,6 +113,88 @@ helpers do dataset.features.keys.each { |f| dataset.features[f][OT.hasSource] = dataset.metadata[OT.hasSource] unless dataset.features[f][OT.hasSource]} File.open("#{@@datadir}/#{@id}.json","w+"){|f| f.puts dataset.to_json} end + + def to_arff(dataset, subjectid=nil, features=nil ) + + LOGGER.debug "convert dataset to arff #{dataset.uri}" + + # count duplicates + num_compounds = {} + dataset.features.keys.each do |f| + dataset.compounds.each do |c| + if dataset.data_entries[c] + val = dataset.data_entries[c][f] + size = val==nil ? 1 : val.size + num_compounds[c] = num_compounds[c]==nil ? size : [num_compounds[c],size].max + else + num_compounds[c] = 1 + end + end + end + + puts "found duplicates" + + # use either all, or the provided features, sorting is important as col-index := features + if features + features.sort! + else + features = dataset.features.keys.sort + end + compounds = [] + compound_names = [] + dataset.compounds.each do |c| + count = 0 + num_compounds[c].times do |i| + compounds << c + compound_names << "#{c}$#{count}" + count+=1 + end + end + + missing = {} + + arff = "@RELATION #{dataset.uri}\n\n" + features.each do |f| + numeric = dataset.features[f][RDF.type].to_a.flatten.include?(OT.NumericFeature) + #feat = OpenTox::Feature.find(f,subjectid) + #numeric = feat.metadata[RDF.type].to_a.flatten.include?(OT.NumericFeature) + if numeric + arff << "@ATTRIBUTE '#{f}' NUMERIC\n" + else + # HACK for binary 0,1 features without accept values (as returned by fminer): + # set missing value to 0 instead ? + # set accept value to [0,1] + accept_values = dataset.accept_values(f) + missing[f] = "0" if accept_values==nil + arff << "@ATTRIBUTE '#{f}' {#{(accept_values==nil ? [0,1] : accept_values).join(",")}}\n" + end + end + + puts "found feature types" + + arff << "\n@DATA\n" + + dataset.compounds.each do |c| + num_compounds[c].times do |i| + c_values = [] + features.each do |f| + accept_values = + if dataset.data_entries[c] + val = dataset.data_entries[c][f] + v = val==nil ? "" : val[i].to_s + else + raise "wtf" if i>0 + v = "" + end + v = (missing[f]==nil ? "?" : missing[f]) if v.size()==0 + c_values << v + end + arff << "#{c_values.join(",")}\n" + end + end + arff + end + end before do @@ -170,6 +252,19 @@ get '/?' do end end +post '/:id/rdf' do + response['Content-Type'] = 'text/uri-list' + task = OpenTox::Task.create("Converting dataset to rdf ", @uri) do + file = "#{@@datadir}/#{params[:id]}.rdfxml" + unless File.exists? file # lazy rdfxml generation + dataset = OpenTox::Dataset.from_json File.read(@json_file) + File.open(file,"w+") { |f| f.puts dataset.to_rdfxml } + end + @uri + end + return_task task +end + # Get a dataset representation # @param [Header] Accept one of `application/rdf+xml, application-x-yaml, text/csv, application/ms-excel` (default application/rdf+xml) # @return [application/rdf+xml, application-x-yaml, text/csv, application/ms-excel] Dataset representation @@ -187,6 +282,14 @@ get '/:id' do when /json/ send_file @json_file, :type => 'application/x-yaml' + when /arff/ + file = "#{@@datadir}/#{params[:id]}.arff" + unless File.exists? file # lazy yaml generation + dataset = OpenTox::Dataset.from_json File.read(@json_file) + File.open(file,"w+") { |f| f.puts to_arff(dataset) } + end + send_file file, :type => 'application/x-yaml' + when /yaml/ file = "#{@@datadir}/#{params[:id]}.yaml" unless File.exists? file # lazy yaml generation diff --git a/dataset_test.rb b/dataset_test.rb new file mode 100644 index 0000000..5f1db3f --- /dev/null +++ b/dataset_test.rb @@ -0,0 +1,74 @@ +require "rubygems" +require "sinatra" +before { + request.env['HTTP_HOST']="local-ot/dataset" + request.env["REQUEST_URI"]=request.env["PATH_INFO"] +} + +require "opentox-ruby" +ENV['RACK_ENV'] = 'test' +require 'application.rb' +require 'test/unit' +require 'rack/test' +LOGGER = Logger.new(STDOUT) +LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " + +module Sinatra + + set :raise_errors, false + set :show_exceptions, false + + module UrlForHelper + BASE = "http://local-ot/dataset" + def url_for url_fragment, mode=:path_only + case mode + when :path_only + raise "not impl" + when :full + end + "#{BASE}#{url_fragment}" + end + end +end + +class DatasetTest < Test::Unit::TestCase + include Rack::Test::Methods + + def app + Sinatra::Application + end + + def test_sth + + begin + + #http://local-ot/dataset/452 + #http://local-ot/dataset/453 + + get '/504',nil,'HTTP_ACCEPT' => "text/arff" + puts last_response.body + + #delete '/cleanup' + #puts last_response.body + + rescue => ex + rep = OpenTox::ErrorReport.create(ex, "") + puts rep.to_yaml + end + + end + + # see test_util.rb + def wait_for_task(uri) + if uri.task_uri? + task = OpenTox::Task.find(uri) + task.wait_for_completion + raise "task failed: "+uri.to_s if task.error? + uri = task.result_uri + end + return uri + end + + + +end
\ No newline at end of file |