summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2011-07-29 11:47:17 +0200
committerAndreas Maunz <andreas@maunz.de>2011-07-29 11:47:17 +0200
commitfa37ab0876faaaa2acf37b147924f025a0d8cd9a (patch)
treebc2bf960e0abc918dc380998a129e3e79dc79d96
parenta3f519bd945bcb2fce5bf587966ff746a03f0db9 (diff)
Added TUM clustering
-rw-r--r--lib/algorithm.rb108
-rw-r--r--lib/parser.rb15
2 files changed, 112 insertions, 11 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index a0ad9a5..3cf4ecf 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -4,6 +4,7 @@
R = nil
require "rinruby"
require "statsample"
+require 'uri'
module OpenTox
@@ -210,6 +211,106 @@ module OpenTox
end
end
+ # Structural Graph Clustering by TU Munich
+ # Finds clusters similar to a query structure in a given training dataset
+ # May be queried for cluster membership of an unknown compound
+ class StructuralClustering
+ attr_accessor :training_dataset_uri, :training_threshold, :query_dataset_uri, :query_threshold, :target_clusters_array
+
+ # @params[String] Training dataset_uri
+ # @params[Float] Similarity threshold for training (optional)
+ # @params[String] Cluster service uri (no AA)
+ def initialize training_dataset_uri, training_threshold=0.8, cluster_service_uri = "http://opentox-dev.informatik.tu-muenchen.de:8080/OpenTox/algorithm/StructuralClustering"
+
+ if (training_dataset_uri =~ URI::regexp).nil? || (cluster_service_uri =~ URI::regexp).nil?
+ raise "Invalid URI."
+ end
+ @training_dataset_uri = training_dataset_uri
+ if !OpenTox::Algorithm.numeric? training_threshold || training_threshold <0 || training_threshold >1
+ raise "Training threshold out of bounds."
+ end
+ @training_threshold = training_threshold.to_f
+
+ # Train a cluster model
+ params = {:dataset_uri => @training_dataset_uri, :threshold => @training_threshold }
+ @cluster_model_uri = OpenTox::RestClientWrapper.post cluster_service_uri, params
+ cluster_model_rdf = OpenTox::RestClientWrapper.get @cluster_model_uri
+ @datasets = OpenTox::Parser::Owl.from_rdf cluster_model_rdf, OT.Dataset, true # must extract OT.Datasets from model
+
+ # Process parsed OWL objects
+ @clusterid_dataset_map = Hash.new
+ @datasets.each { |d|
+ begin
+ d.metadata[OT.hasSource]["Structural Clustering cluster "] = "" # must parse in metadata for string (not elegant)
+ @clusterid_dataset_map[d.metadata[OT.hasSource].to_i] = d.uri
+ rescue Exception => e
+ # ignore other entries!
+ end
+ }
+ end
+
+ # Whether a model has been trained
+ def trained?
+ !@cluster_model_uri.nil?
+ end
+
+ # Instance query: clusters for a compound
+ # @params[String] Query compound
+ # @params[Float] Similarity threshold for query to clusters (optional)
+ def get_clusters query_compound_uri, query_threshold = 0.5
+
+ if !OpenTox::Algorithm.numeric? query_threshold || query_threshold <0 || query_threshold >1
+ raise "Query threshold out of bounds."
+ end
+ @query_threshold = query_threshold.to_f
+
+
+ # Preparing a query dataset
+ query_dataset = OpenTox::Dataset.new
+ @query_dataset_uri = query_dataset.save
+ query_dataset = OpenTox::Dataset.find @query_dataset_uri
+ query_dataset.add_compound query_compound_uri
+ @query_dataset_uri = query_dataset.save
+
+ # Obtaining a clustering for query compound
+ params = { :dataset_uri => @query_dataset_uri, :threshold => @query_threshold }
+ cluster_query_dataset_uri = OpenTox::RestClientWrapper.post @cluster_model_uri, params
+ cluster_query_dataset = OpenTox::Dataset.new cluster_query_dataset_uri
+ cluster_query_dataset.load_all
+
+ # Reading cluster ids for features from metadata
+ feature_clusterid_map = Hash.new
+ pattern="Prediction feature for cluster assignment " # must parse for string in metadata (not elegant)
+ cluster_query_dataset.features.each { |feature_uri,metadata|
+ metadata[DC.title][pattern]=""
+ feature_clusterid_map[feature_uri] = metadata[DC.title].to_i
+ }
+
+ # Integrity check
+ unless cluster_query_dataset.compounds.size == 1
+ raise "Number of predicted compounds is != 1."
+ end
+
+ # Process data entry
+ query_compound_uri = cluster_query_dataset.compounds[0]
+ @target_clusters_array = Array.new
+ cluster_query_dataset.features.keys.each { |cluster_membership_feature|
+
+ # Getting dataset URI for cluster
+ target_cluster = feature_clusterid_map[cluster_membership_feature]
+ dataset = @clusterid_dataset_map[target_cluster]
+
+ # Finally look up presence
+ data_entry = cluster_query_dataset.data_entries[query_compound_uri]
+ present = data_entry[cluster_membership_feature][0]
+
+ # Store result
+ @target_clusters_array << dataset if present > 0.5 # 0.0 for absence, 1.0 for presence
+ }
+ end
+
+ end
+
module Neighbors
# Local multi-linear regression (MLR) prediction from neighbors.
@@ -811,6 +912,13 @@ module OpenTox
(nr_zeroes == 0) # also remove feature present everywhere
end
+ # Numeric value test
+ # @param[Object] value
+ # @return [Boolean] Whether value is a number
+ def self.numeric?(value)
+ true if Float(value) rescue false
+ end
+
# For symbolic features
# @param [Array] Array to test, must indicate non-occurrence with 0.
# @return [Boolean] Whether the feature has variance zero.
diff --git a/lib/parser.rb b/lib/parser.rb
index 4ee4a22..d0975af 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -447,12 +447,8 @@ module OpenTox
end
end
- def numeric?(value)
- true if Float(value) rescue false
- end
-
def feature_type(value)
- if numeric? value
+ if OpenTox::Algorithm::numeric? value
return OT.NumericFeature
else
return OT.NominalFeature
@@ -493,7 +489,7 @@ module OpenTox
if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature
# REGRESSION
elsif feature_types(feature).include? OT.NumericFeature
- @data.each{|c,row| row[feature] = nil unless numeric?(row[feature]) } # delete nominal features
+ @data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features
@activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)."
else
@activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)."
@@ -522,7 +518,7 @@ module OpenTox
@data.each do |compound,row|
unless row.empty?
row.each do |feature,value|
- if numeric?(value)
+ if OpenTox::Algorithm::numeric?(value)
value = value.to_f
elsif value.nil? or value.empty?
value = nil
@@ -545,12 +541,9 @@ module OpenTox
end
private
- def numeric?(value)
- true if Float(value) rescue false
- end
def feature_type(value)
- if numeric? value
+ if OpenTox::Algorithm::numeric? value
return OT.NumericFeature
else
return OT.NominalFeature