summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.de>2009-12-20 19:16:25 +0100
committerChristoph Helma <helma@in-silico.de>2009-12-20 19:16:25 +0100
commit89bc8a67e63cf8e752f3b89aa52f7db87ce7ea7f (patch)
tree7bcb8dbabc70d5d026c7811fae700a803653cdc3
parentfbee9fbf8ce286a1264ef4ce8f3dfb77d048d067 (diff)
model adapted to OWL-DL, dataset predictions added
-rw-r--r--lib/algorithm.rb22
-rw-r--r--lib/dataset.rb133
-rw-r--r--lib/model.rb147
-rw-r--r--opentox-ruby-api-wrapper.gemspec9
4 files changed, 166 insertions, 145 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index a32a462..7fe08fa 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -14,6 +14,10 @@ module OpenTox
"Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" }
}
end
+
+ def self.create_feature_dataset(params)
+ RestClient.post params[:feature_generation_uri], :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri]
+ end
end
class Lazar #< OpenTox
@@ -35,5 +39,23 @@ module OpenTox
end
end
+ class Similarity
+
+ def self.weighted_tanimoto(fp_a,fp_b,p)
+ common_features = fp_a & fp_b
+ all_features = fp_a + fp_b
+ common_p_sum = 0.0
+ if common_features.size > 0
+ common_features.each{|f| common_p_sum += p[f]}
+ all_p_sum = 0.0
+ all_features.each{|f| all_p_sum += p[f]}
+ common_p_sum/all_p_sum
+ else
+ 0.0
+ end
+ end
+
+ end
+
end
end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 225b900..30f9644 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -8,7 +8,6 @@ module OpenTox
end
# create/add to entry from uris or Redland::Resources
- # TODO add tuple
def add(compound,feature,value)
compound = self.find_or_create_compound compound unless compound.class == Redland::Resource
feature = self.find_or_create_feature feature unless feature.class == Redland::Resource
@@ -57,7 +56,6 @@ module OpenTox
def find_or_create_compound(uri)
compound = @model.subject(DC["identifier"], uri)
if compound.nil?
- #puts uri
compound = @model.create_resource
@model.add compound, RDF['type'], OT["Compound"]
@model.add compound, DC["identifier"], uri
@@ -78,29 +76,6 @@ module OpenTox
feature
end
- # find or create a new value and return the resource
-=begin
- def find_or_create_value(v)
- value = @model.subject OT["value"], v.to_s
- if value.nil?
- value = @model.create_resource
- @model.add value, RDF['type'], OT["FeatureValue"]
- @model.add value, OT["value"], v.to_s
- end
- value
- end
-=end
-
-=begin
- def add_data_entry(compound,feature,value)
- data_entry = @model.create_resource
- @model.add data_entry, RDF['type'], OT["DataEntry"]
- @model.add data_entry, OT['compound'], compound
- @model.add data_entry, OT['feature'], feature
- @model.add data_entry, OT['values'], value
- end
-=end
-
def self.create(data, content_type = 'application/rdf+xml')
uri = RestClient.post @@config[:services]["opentox-dataset"], data, :content_type => content_type
dataset = Dataset.new
@@ -127,30 +102,35 @@ module OpenTox
features
end
- def data_entries
- data_entries = {}
- self.compounds.each do |compound|
- compound_node = @model.subject(DC["identifier"],compound)
- compound = compound.to_s.sub(/^\[(.*)\]$/,'\1')
- data_entries[compound] = {} unless data_entries[compound]
- @model.subjects(OT['compound'], compound_node).each do |data_entry|
- feature_node = @model.object(data_entry, OT['feature'])
- feature = @model.object(feature_node,DC['identifier']).to_s
- values_node = @model.object(data_entry, OT['values'])
- data_entries[compound][feature] = [] unless data_entries[compound][feature]
- @model.find(values_node, OT['value'], nil) do |s,p,value|
- case value.to_s
- when "true"
- data_entries[compound][feature] << true
- when "false"
- data_entries[compound][feature] << false
- else
- data_entries[compound][feature] << value.to_s
+ def data
+ data = {}
+ @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
+ compound_node = @model.object(data_entry, OT['compound'])
+ @model.find(compound_node, OT['identifier'],nil) {|s,p,o| puts o.to_s}
+ compound_uri = @model.object(compound_node, DC['identifier']).to_s
+ data[compound_uri] = [] unless data[compound_uri]
+ @model.find(data_entry, OT['values'], nil) do |s,p,values|
+ entry = {}
+ feature_node = @model.object values, OT['feature']
+ feature_uri = @model.object(feature_node, DC['identifier']).to_s
+ # TODO simple features
+ type = @model.object(values, RDF['type'])
+ if type == OT['FeatureValue']
+ #entry[feature_uri] = [] unless entry[feature_uri]
+ entry[feature_uri] = @model.object(values, OT['value']).to_s
+ elsif type == OT['Tuple']
+ entry[feature_uri] = {} unless entry[feature_uri]
+ @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
+ name_node = @model.object complex_value, OT['feature']
+ name = @model.object(name_node, DC['title']).to_s
+ value = @model.object(complex_value, OT['value']).to_s
+ entry[feature_uri][name] = value
end
end
+ data[compound_uri] << entry
end
end
- data_entries
+ data
end
def feature_values(feature_uri)
@@ -159,18 +139,17 @@ module OpenTox
@model.subjects(RDF['type'], OT["Compound"]).each do |compound_node|
compound = @model.object(compound_node, DC["identifier"]).to_s.sub(/^\[(.*)\]$/,'\1')
features[compound] = [] unless features[compound]
- @model.subjects(OT['compound'], compound_node).each do |data_entry|
- if feature == @model.object(data_entry, OT['feature'])
- values_node = @model.object(data_entry, OT['values'])
- @model.find(values_node, OT['value'], nil) do |s,p,value|
- case value.to_s
- when "true"
- features[compound] << true
- when "false"
- features[compound] << false
- else
- features[compound] << value.to_s
- end
+ data_entry = @model.subject(OT['compound'], compound_node)
+ @model.find( data_entry, OT['values'], nil ) do |s,p,values|
+ if feature == @model.object(values, OT['feature'])
+ value = @model.object(values, OT['value'])
+ case value.to_s
+ when "true"
+ features[compound] << true
+ when "false"
+ features[compound] << false
+ else
+ features[compound] << value.to_s
end
end
end
@@ -178,6 +157,7 @@ module OpenTox
features
end
+=begin
def tuples
tuples = []
@model.subjects(RDF['type'], OT["Tuple"]).each do |t|
@@ -215,11 +195,12 @@ module OpenTox
#puts values_node
end
end
+=end
def compounds
compounds = []
@model.subjects(RDF['type'], OT["Compound"]).each do |compound_node|
- compounds << @model.object(compound_node, DC["identifier"])#
+ compounds << @model.object(compound_node, DC["identifier"]).to_s
end
compounds
end
@@ -234,8 +215,6 @@ module OpenTox
end
def to_yaml
- #compounds.each do |c|
- #end
{
:uri => self.uri,
:opentox_class => self.owl_class,
@@ -244,42 +223,10 @@ module OpenTox
:identifier => self.identifier,
:compounds => self.compounds.collect{|c| c.to_s.to_s.sub(/^\[(.*)\]$/,'\1')},
:features => self.features.collect{|f| f.to_s },
- #:data_entries => self.data_entries,
-=begin
- :tuples => self.compounds.collect{|c|
- compound_uri = c.to_s.to_s.sub(/^\[(.*)\]$/,'\1')
- {compound_uri => self.tuple(compound_uri)}
- },
-=end
- #:feature_values => self.features.collect{|f| { f.to_s => self.feature_values(f.to_s)} }
+ :data_entries => self.data_entries,
}.to_yaml
end
end
end
-=begin
- def tuple?(t)
- statements = []
- has_tuple = true
- t.each do |name,v|
- feature = self.find_or_create_feature(:name => name)
- value = self.find_or_create_value(v)
- tuple = @model.subject(feature,value)
- has_tuple = false if tuple.nil?
- statements << [tuple,feature,value]
- end
- tuples_found = statements.collect{|s| s[0]}.uniq
- has_tuple = false unless tuples_found.size == 1
- has_tuple
- end
-
- def find_or_create_tuple(t)
- if self.tuple?(t)
- t
- else
- self.create_tuple(t)
- end
- end
-=end
-
diff --git a/lib/model.rb b/lib/model.rb
index 50d6bea..2f4525a 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -1,16 +1,18 @@
module OpenTox
module Model
+
class Lazar
include Owl
+
+ attr_accessor :dataset, :predictions
# Create a new prediction model from a dataset
- def initialize
- super
- end
-
- def read_yaml(id,yaml)
- @lazar = YAML.load yaml
- self.identifier = File.join(@@config[:services]["opentox-model"],'lazar',id)
+ def initialize(yaml)
+ super()
+ id = File.basename(yaml,'.yaml')
+ # TODO Untyped Individual: http://localhost:4003/lazar/{id} ????
+ @lazar = YAML.load_file yaml
+ self.uri = File.join(@@config[:services]["opentox-model"],'lazar',id)
self.title = "lazar model for #{@lazar[:endpoint]}"
self.source = "http://github.com/helma/opentox-model"
self.parameters = {
@@ -18,19 +20,24 @@ module OpenTox
"Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri=#{@lazar[:endpoint]}" },
"Feature generation URI" => { :scope => "mandatory", :value => "feature_generation_uri=" } #TODO write to yaml
}
- self.algorithm = File.join(@@config[:services]["opentox-model"],"lazar")
+ self.algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar")
self.trainingDataset = @lazar[:activity_dataset]
self.dependentVariables = @lazar[:endpoint]
- self.predictedVariables = @lazar[:endpoint] + " lazar prediction"
+ self.independentVariables = "http://localhost:4002/fminer#BBRC_representative" # TODO read this from dataset
+ self.predictedVariables = @lazar[:endpoint] #+ " lazar prediction"
+ @dataset = OpenTox::Dataset.new
+ @predictions = {}
end
def self.find(uri)
+=begin
begin
YAML.load(RestClient.get uri)
Lazar.new uri
rescue
halt 404, "Model #{uri} not found."
end
+=end
end
def self.find_all
@@ -42,6 +49,65 @@ module OpenTox
RestClient.post(@uri, :compound_uri => compound.uri)
end
+ def database_activity?(compound_uri)
+ # find database activities
+ db_activities = @lazar[:activities][compound_uri]
+ if db_activities
+ c = @dataset.find_or_create_compound(compound_uri)
+ f = @dataset.find_or_create_feature(@lazar[:endpoint])
+ v = db_activities.join(',')
+ @dataset.add c,f,v
+ @predictions[compound_uri] = { @lazar[:endpoint] => {:measured_activities => db_activities}}
+ true
+ else
+ false
+ end
+ end
+
+ def classify(compound_uri)
+
+ compound = OpenTox::Compound.new(:uri => compound_uri)
+ compound_matches = compound.match @lazar[:features]
+
+ conf = 0.0
+ neighbors = []
+ classification = nil
+
+ @lazar[:fingerprints].each do |uri,matches|
+
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,@lazar[:p_values])
+ if sim > 0.3
+ neighbors << uri
+ @lazar[:activities][uri].each do |act|
+ case act.to_s
+ when 'true'
+ conf += OpenTox::Utils.gauss(sim)
+ when 'false'
+ conf -= OpenTox::Utils.gauss(sim)
+ end
+ end
+ end
+ end
+
+ conf = conf/neighbors.size
+ if conf > 0.0
+ classification = true
+ elsif conf < 0.0
+ classification = false
+ end
+
+ compound = @dataset.find_or_create_compound(compound_uri)
+ feature = @dataset.find_or_create_feature(@lazar[:endpoint])
+ tuple = @dataset.create_tuple(feature,{ 'lazar#classification' => classification, 'lazar#confidence' => conf})
+ @dataset.add_tuple compound,tuple
+ @predictions[compound_uri] = { @lazar[:endpoint] => { :lazar_prediction => {
+ :classification => classification,
+ :confidence => conf,
+ :neighbors => neighbors,
+ :features => compound_matches
+ } } }
+ end
+
def self.base_uri
@@config[:services]["opentox-model"]
end
@@ -54,50 +120,35 @@ module OpenTox
YAML.load(RestClient.get uri)[:endpoint]
end
- end
- end
-
-
-=begin
- module Model
-
- class LazarClassification < OpenTox
-
-
- end
-
- end
-
- module Prediction
-
- module Classification
-
- class Lazar < OpenTox
-
- def initialize(params)
- super(params[:uri])
- end
-
- def classification
- YAML.load(RestClient.get(@uri))[:classification]
- end
-
- def confidence
- YAML.load(RestClient.get(@uri))[:confidence]
- end
+ def algorithm=(algorithm)
+ me = @model.subject(RDF['type'],OT[self.owl_class])
+ @model.add me, OT['algorithm'], Redland::Uri.new(algorithm) # untyped individual comes from this line, why??
+ @model.add Redland::Uri.new(algorithm), RDF['type'], OT['Algorithm']
+ end
- def neighbors
- RestClient.get @uri + '/neighbors'
- end
+ def trainingDataset=(trainingDataset)
+ me = @model.subject(RDF['type'],OT[self.owl_class])
+ @model.add me, OT['trainingDataset'], Redland::Uri.new(trainingDataset) # untyped individual comes from this line, why??
+ @model.add Redland::Uri.new(trainingDataset), RDF['type'], OT['Dataset']
+ end
- def features
- RestClient.get @uri + '/features'
- end
+ def dependentVariables=(dependentVariables)
+ me = @model.subject(RDF['type'],OT[self.owl_class])
+ @model.add me, OT['dependentVariables'], Redland::Uri.new(dependentVariables) # untyped individual comes from this line, why??
+ @model.add Redland::Uri.new(dependentVariables), RDF['type'], OT['Feature']
+ end
+ def independentVariables=(independentVariables)
+ me = @model.subject(RDF['type'],OT[self.owl_class])
+ @model.add me, OT['independentVariables'], Redland::Uri.new(independentVariables) # untyped individual comes from this line, why??
+ @model.add Redland::Uri.new(independentVariables), RDF['type'], OT['Feature']
end
+ def predictedVariables=(predictedVariables)
+ me = @model.subject(RDF['type'],OT[self.owl_class])
+ @model.add me, OT['predictedVariables'], Redland::Uri.new(predictedVariables) # untyped individual comes from this line, why??
+ @model.add Redland::Uri.new(predictedVariables), RDF['type'], OT['Feature']
+ end
end
-
end
-=end
end
diff --git a/opentox-ruby-api-wrapper.gemspec b/opentox-ruby-api-wrapper.gemspec
index 827296f..41335e9 100644
--- a/opentox-ruby-api-wrapper.gemspec
+++ b/opentox-ruby-api-wrapper.gemspec
@@ -5,15 +5,14 @@
Gem::Specification.new do |s|
s.name = %q{opentox-ruby-api-wrapper}
- s.version = "1.2.1"
+ s.version = "1.2.2"
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Christoph Helma"]
- s.date = %q{2009-12-16}
- s.default_executable = %q{opentox-install-debian.sh}
+ s.date = %q{2009-12-20}
s.description = %q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
s.email = %q{helma@in-silico.ch}
- s.executables = ["opentox-install-debian.sh"]
+ s.executables = ["opentox-install-debian.sh", "yaml2owl.rb"]
s.extra_rdoc_files = [
"LICENSE",
"README.rdoc"
@@ -24,6 +23,7 @@ Gem::Specification.new do |s|
"Rakefile",
"VERSION",
"bin/opentox-install-debian.sh",
+ "bin/yaml2owl.rb",
"lib/algorithm.rb",
"lib/compound.rb",
"lib/dataset.rb",
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
"lib/helper.rb",
"lib/model.rb",
"lib/opentox-ruby-api-wrapper.rb",
+ "lib/opentox.owl",
"lib/owl.rb",
"lib/spork.rb",
"lib/task.rb",