From 89bc8a67e63cf8e752f3b89aa52f7db87ce7ea7f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 20 Dec 2009 19:16:25 +0100 Subject: model adapted to OWL-DL, dataset predictions added --- lib/algorithm.rb | 22 ++++++ lib/dataset.rb | 133 +++++++++++------------------------ lib/model.rb | 147 ++++++++++++++++++++++++++------------- opentox-ruby-api-wrapper.gemspec | 9 +-- 4 files changed, 166 insertions(+), 145 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index a32a462..7fe08fa 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -14,6 +14,10 @@ module OpenTox "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" } } end + + def self.create_feature_dataset(params) + RestClient.post params[:feature_generation_uri], :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri] + end end class Lazar #< OpenTox @@ -35,5 +39,23 @@ module OpenTox end end + class Similarity + + def self.weighted_tanimoto(fp_a,fp_b,p) + common_features = fp_a & fp_b + all_features = fp_a + fp_b + common_p_sum = 0.0 + if common_features.size > 0 + common_features.each{|f| common_p_sum += p[f]} + all_p_sum = 0.0 + all_features.each{|f| all_p_sum += p[f]} + common_p_sum/all_p_sum + else + 0.0 + end + end + + end + end end diff --git a/lib/dataset.rb b/lib/dataset.rb index 225b900..30f9644 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -8,7 +8,6 @@ module OpenTox end # create/add to entry from uris or Redland::Resources - # TODO add tuple def add(compound,feature,value) compound = self.find_or_create_compound compound unless compound.class == Redland::Resource feature = self.find_or_create_feature feature unless feature.class == Redland::Resource @@ -57,7 +56,6 @@ module OpenTox def find_or_create_compound(uri) compound = @model.subject(DC["identifier"], uri) if compound.nil? - #puts uri compound = @model.create_resource @model.add compound, RDF['type'], OT["Compound"] @model.add compound, DC["identifier"], uri @@ -78,29 +76,6 @@ module OpenTox feature end - # find or create a new value and return the resource -=begin - def find_or_create_value(v) - value = @model.subject OT["value"], v.to_s - if value.nil? - value = @model.create_resource - @model.add value, RDF['type'], OT["FeatureValue"] - @model.add value, OT["value"], v.to_s - end - value - end -=end - -=begin - def add_data_entry(compound,feature,value) - data_entry = @model.create_resource - @model.add data_entry, RDF['type'], OT["DataEntry"] - @model.add data_entry, OT['compound'], compound - @model.add data_entry, OT['feature'], feature - @model.add data_entry, OT['values'], value - end -=end - def self.create(data, content_type = 'application/rdf+xml') uri = RestClient.post @@config[:services]["opentox-dataset"], data, :content_type => content_type dataset = Dataset.new @@ -127,30 +102,35 @@ module OpenTox features end - def data_entries - data_entries = {} - self.compounds.each do |compound| - compound_node = @model.subject(DC["identifier"],compound) - compound = compound.to_s.sub(/^\[(.*)\]$/,'\1') - data_entries[compound] = {} unless data_entries[compound] - @model.subjects(OT['compound'], compound_node).each do |data_entry| - feature_node = @model.object(data_entry, OT['feature']) - feature = @model.object(feature_node,DC['identifier']).to_s - values_node = @model.object(data_entry, OT['values']) - data_entries[compound][feature] = [] unless data_entries[compound][feature] - @model.find(values_node, OT['value'], nil) do |s,p,value| - case value.to_s - when "true" - data_entries[compound][feature] << true - when "false" - data_entries[compound][feature] << false - else - data_entries[compound][feature] << value.to_s + def data + data = {} + @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry| + compound_node = @model.object(data_entry, OT['compound']) + @model.find(compound_node, OT['identifier'],nil) {|s,p,o| puts o.to_s} + compound_uri = @model.object(compound_node, DC['identifier']).to_s + data[compound_uri] = [] unless data[compound_uri] + @model.find(data_entry, OT['values'], nil) do |s,p,values| + entry = {} + feature_node = @model.object values, OT['feature'] + feature_uri = @model.object(feature_node, DC['identifier']).to_s + # TODO simple features + type = @model.object(values, RDF['type']) + if type == OT['FeatureValue'] + #entry[feature_uri] = [] unless entry[feature_uri] + entry[feature_uri] = @model.object(values, OT['value']).to_s + elsif type == OT['Tuple'] + entry[feature_uri] = {} unless entry[feature_uri] + @model.find(values, OT['complexValue'],nil) do |s,p,complex_value| + name_node = @model.object complex_value, OT['feature'] + name = @model.object(name_node, DC['title']).to_s + value = @model.object(complex_value, OT['value']).to_s + entry[feature_uri][name] = value end end + data[compound_uri] << entry end end - data_entries + data end def feature_values(feature_uri) @@ -159,18 +139,17 @@ module OpenTox @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node| compound = @model.object(compound_node, DC["identifier"]).to_s.sub(/^\[(.*)\]$/,'\1') features[compound] = [] unless features[compound] - @model.subjects(OT['compound'], compound_node).each do |data_entry| - if feature == @model.object(data_entry, OT['feature']) - values_node = @model.object(data_entry, OT['values']) - @model.find(values_node, OT['value'], nil) do |s,p,value| - case value.to_s - when "true" - features[compound] << true - when "false" - features[compound] << false - else - features[compound] << value.to_s - end + data_entry = @model.subject(OT['compound'], compound_node) + @model.find( data_entry, OT['values'], nil ) do |s,p,values| + if feature == @model.object(values, OT['feature']) + value = @model.object(values, OT['value']) + case value.to_s + when "true" + features[compound] << true + when "false" + features[compound] << false + else + features[compound] << value.to_s end end end @@ -178,6 +157,7 @@ module OpenTox features end +=begin def tuples tuples = [] @model.subjects(RDF['type'], OT["Tuple"]).each do |t| @@ -215,11 +195,12 @@ module OpenTox #puts values_node end end +=end def compounds compounds = [] @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node| - compounds << @model.object(compound_node, DC["identifier"])# + compounds << @model.object(compound_node, DC["identifier"]).to_s end compounds end @@ -234,8 +215,6 @@ module OpenTox end def to_yaml - #compounds.each do |c| - #end { :uri => self.uri, :opentox_class => self.owl_class, @@ -244,42 +223,10 @@ module OpenTox :identifier => self.identifier, :compounds => self.compounds.collect{|c| c.to_s.to_s.sub(/^\[(.*)\]$/,'\1')}, :features => self.features.collect{|f| f.to_s }, - #:data_entries => self.data_entries, -=begin - :tuples => self.compounds.collect{|c| - compound_uri = c.to_s.to_s.sub(/^\[(.*)\]$/,'\1') - {compound_uri => self.tuple(compound_uri)} - }, -=end - #:feature_values => self.features.collect{|f| { f.to_s => self.feature_values(f.to_s)} } + :data_entries => self.data_entries, }.to_yaml end end end -=begin - def tuple?(t) - statements = [] - has_tuple = true - t.each do |name,v| - feature = self.find_or_create_feature(:name => name) - value = self.find_or_create_value(v) - tuple = @model.subject(feature,value) - has_tuple = false if tuple.nil? - statements << [tuple,feature,value] - end - tuples_found = statements.collect{|s| s[0]}.uniq - has_tuple = false unless tuples_found.size == 1 - has_tuple - end - - def find_or_create_tuple(t) - if self.tuple?(t) - t - else - self.create_tuple(t) - end - end -=end - diff --git a/lib/model.rb b/lib/model.rb index 50d6bea..2f4525a 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -1,16 +1,18 @@ module OpenTox module Model + class Lazar include Owl + + attr_accessor :dataset, :predictions # Create a new prediction model from a dataset - def initialize - super - end - - def read_yaml(id,yaml) - @lazar = YAML.load yaml - self.identifier = File.join(@@config[:services]["opentox-model"],'lazar',id) + def initialize(yaml) + super() + id = File.basename(yaml,'.yaml') + # TODO Untyped Individual: http://localhost:4003/lazar/{id} ???? + @lazar = YAML.load_file yaml + self.uri = File.join(@@config[:services]["opentox-model"],'lazar',id) self.title = "lazar model for #{@lazar[:endpoint]}" self.source = "http://github.com/helma/opentox-model" self.parameters = { @@ -18,19 +20,24 @@ module OpenTox "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri=#{@lazar[:endpoint]}" }, "Feature generation URI" => { :scope => "mandatory", :value => "feature_generation_uri=" } #TODO write to yaml } - self.algorithm = File.join(@@config[:services]["opentox-model"],"lazar") + self.algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar") self.trainingDataset = @lazar[:activity_dataset] self.dependentVariables = @lazar[:endpoint] - self.predictedVariables = @lazar[:endpoint] + " lazar prediction" + self.independentVariables = "http://localhost:4002/fminer#BBRC_representative" # TODO read this from dataset + self.predictedVariables = @lazar[:endpoint] #+ " lazar prediction" + @dataset = OpenTox::Dataset.new + @predictions = {} end def self.find(uri) +=begin begin YAML.load(RestClient.get uri) Lazar.new uri rescue halt 404, "Model #{uri} not found." end +=end end def self.find_all @@ -42,6 +49,65 @@ module OpenTox RestClient.post(@uri, :compound_uri => compound.uri) end + def database_activity?(compound_uri) + # find database activities + db_activities = @lazar[:activities][compound_uri] + if db_activities + c = @dataset.find_or_create_compound(compound_uri) + f = @dataset.find_or_create_feature(@lazar[:endpoint]) + v = db_activities.join(',') + @dataset.add c,f,v + @predictions[compound_uri] = { @lazar[:endpoint] => {:measured_activities => db_activities}} + true + else + false + end + end + + def classify(compound_uri) + + compound = OpenTox::Compound.new(:uri => compound_uri) + compound_matches = compound.match @lazar[:features] + + conf = 0.0 + neighbors = [] + classification = nil + + @lazar[:fingerprints].each do |uri,matches| + + sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,@lazar[:p_values]) + if sim > 0.3 + neighbors << uri + @lazar[:activities][uri].each do |act| + case act.to_s + when 'true' + conf += OpenTox::Utils.gauss(sim) + when 'false' + conf -= OpenTox::Utils.gauss(sim) + end + end + end + end + + conf = conf/neighbors.size + if conf > 0.0 + classification = true + elsif conf < 0.0 + classification = false + end + + compound = @dataset.find_or_create_compound(compound_uri) + feature = @dataset.find_or_create_feature(@lazar[:endpoint]) + tuple = @dataset.create_tuple(feature,{ 'lazar#classification' => classification, 'lazar#confidence' => conf}) + @dataset.add_tuple compound,tuple + @predictions[compound_uri] = { @lazar[:endpoint] => { :lazar_prediction => { + :classification => classification, + :confidence => conf, + :neighbors => neighbors, + :features => compound_matches + } } } + end + def self.base_uri @@config[:services]["opentox-model"] end @@ -54,50 +120,35 @@ module OpenTox YAML.load(RestClient.get uri)[:endpoint] end - end - end - - -=begin - module Model - - class LazarClassification < OpenTox - - - end - - end - - module Prediction - - module Classification - - class Lazar < OpenTox - - def initialize(params) - super(params[:uri]) - end - - def classification - YAML.load(RestClient.get(@uri))[:classification] - end - - def confidence - YAML.load(RestClient.get(@uri))[:confidence] - end + def algorithm=(algorithm) + me = @model.subject(RDF['type'],OT[self.owl_class]) + @model.add me, OT['algorithm'], Redland::Uri.new(algorithm) # untyped individual comes from this line, why?? + @model.add Redland::Uri.new(algorithm), RDF['type'], OT['Algorithm'] + end - def neighbors - RestClient.get @uri + '/neighbors' - end + def trainingDataset=(trainingDataset) + me = @model.subject(RDF['type'],OT[self.owl_class]) + @model.add me, OT['trainingDataset'], Redland::Uri.new(trainingDataset) # untyped individual comes from this line, why?? + @model.add Redland::Uri.new(trainingDataset), RDF['type'], OT['Dataset'] + end - def features - RestClient.get @uri + '/features' - end + def dependentVariables=(dependentVariables) + me = @model.subject(RDF['type'],OT[self.owl_class]) + @model.add me, OT['dependentVariables'], Redland::Uri.new(dependentVariables) # untyped individual comes from this line, why?? + @model.add Redland::Uri.new(dependentVariables), RDF['type'], OT['Feature'] + end + def independentVariables=(independentVariables) + me = @model.subject(RDF['type'],OT[self.owl_class]) + @model.add me, OT['independentVariables'], Redland::Uri.new(independentVariables) # untyped individual comes from this line, why?? + @model.add Redland::Uri.new(independentVariables), RDF['type'], OT['Feature'] end + def predictedVariables=(predictedVariables) + me = @model.subject(RDF['type'],OT[self.owl_class]) + @model.add me, OT['predictedVariables'], Redland::Uri.new(predictedVariables) # untyped individual comes from this line, why?? + @model.add Redland::Uri.new(predictedVariables), RDF['type'], OT['Feature'] + end end - end -=end end diff --git a/opentox-ruby-api-wrapper.gemspec b/opentox-ruby-api-wrapper.gemspec index 827296f..41335e9 100644 --- a/opentox-ruby-api-wrapper.gemspec +++ b/opentox-ruby-api-wrapper.gemspec @@ -5,15 +5,14 @@ Gem::Specification.new do |s| s.name = %q{opentox-ruby-api-wrapper} - s.version = "1.2.1" + s.version = "1.2.2" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["Christoph Helma"] - s.date = %q{2009-12-16} - s.default_executable = %q{opentox-install-debian.sh} + s.date = %q{2009-12-20} s.description = %q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)} s.email = %q{helma@in-silico.ch} - s.executables = ["opentox-install-debian.sh"] + s.executables = ["opentox-install-debian.sh", "yaml2owl.rb"] s.extra_rdoc_files = [ "LICENSE", "README.rdoc" @@ -24,6 +23,7 @@ Gem::Specification.new do |s| "Rakefile", "VERSION", "bin/opentox-install-debian.sh", + "bin/yaml2owl.rb", "lib/algorithm.rb", "lib/compound.rb", "lib/dataset.rb", @@ -32,6 +32,7 @@ Gem::Specification.new do |s| "lib/helper.rb", "lib/model.rb", "lib/opentox-ruby-api-wrapper.rb", + "lib/opentox.owl", "lib/owl.rb", "lib/spork.rb", "lib/task.rb", -- cgit v1.2.3