diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-07-29 11:47:17 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-07-29 11:47:17 +0200 |
commit | fa37ab0876faaaa2acf37b147924f025a0d8cd9a (patch) | |
tree | bc2bf960e0abc918dc380998a129e3e79dc79d96 | |
parent | a3f519bd945bcb2fce5bf587966ff746a03f0db9 (diff) |
Added TUM clustering
-rw-r--r-- | lib/algorithm.rb | 108 | ||||
-rw-r--r-- | lib/parser.rb | 15 |
2 files changed, 112 insertions, 11 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index a0ad9a5..3cf4ecf 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -4,6 +4,7 @@ R = nil require "rinruby" require "statsample" +require 'uri' module OpenTox @@ -210,6 +211,106 @@ module OpenTox end end + # Structural Graph Clustering by TU Munich + # Finds clusters similar to a query structure in a given training dataset + # May be queried for cluster membership of an unknown compound + class StructuralClustering + attr_accessor :training_dataset_uri, :training_threshold, :query_dataset_uri, :query_threshold, :target_clusters_array + + # @params[String] Training dataset_uri + # @params[Float] Similarity threshold for training (optional) + # @params[String] Cluster service uri (no AA) + def initialize training_dataset_uri, training_threshold=0.8, cluster_service_uri = "http://opentox-dev.informatik.tu-muenchen.de:8080/OpenTox/algorithm/StructuralClustering" + + if (training_dataset_uri =~ URI::regexp).nil? || (cluster_service_uri =~ URI::regexp).nil? + raise "Invalid URI." + end + @training_dataset_uri = training_dataset_uri + if !OpenTox::Algorithm.numeric? training_threshold || training_threshold <0 || training_threshold >1 + raise "Training threshold out of bounds." + end + @training_threshold = training_threshold.to_f + + # Train a cluster model + params = {:dataset_uri => @training_dataset_uri, :threshold => @training_threshold } + @cluster_model_uri = OpenTox::RestClientWrapper.post cluster_service_uri, params + cluster_model_rdf = OpenTox::RestClientWrapper.get @cluster_model_uri + @datasets = OpenTox::Parser::Owl.from_rdf cluster_model_rdf, OT.Dataset, true # must extract OT.Datasets from model + + # Process parsed OWL objects + @clusterid_dataset_map = Hash.new + @datasets.each { |d| + begin + d.metadata[OT.hasSource]["Structural Clustering cluster "] = "" # must parse in metadata for string (not elegant) + @clusterid_dataset_map[d.metadata[OT.hasSource].to_i] = d.uri + rescue Exception => e + # ignore other entries! + end + } + end + + # Whether a model has been trained + def trained? + !@cluster_model_uri.nil? + end + + # Instance query: clusters for a compound + # @params[String] Query compound + # @params[Float] Similarity threshold for query to clusters (optional) + def get_clusters query_compound_uri, query_threshold = 0.5 + + if !OpenTox::Algorithm.numeric? query_threshold || query_threshold <0 || query_threshold >1 + raise "Query threshold out of bounds." + end + @query_threshold = query_threshold.to_f + + + # Preparing a query dataset + query_dataset = OpenTox::Dataset.new + @query_dataset_uri = query_dataset.save + query_dataset = OpenTox::Dataset.find @query_dataset_uri + query_dataset.add_compound query_compound_uri + @query_dataset_uri = query_dataset.save + + # Obtaining a clustering for query compound + params = { :dataset_uri => @query_dataset_uri, :threshold => @query_threshold } + cluster_query_dataset_uri = OpenTox::RestClientWrapper.post @cluster_model_uri, params + cluster_query_dataset = OpenTox::Dataset.new cluster_query_dataset_uri + cluster_query_dataset.load_all + + # Reading cluster ids for features from metadata + feature_clusterid_map = Hash.new + pattern="Prediction feature for cluster assignment " # must parse for string in metadata (not elegant) + cluster_query_dataset.features.each { |feature_uri,metadata| + metadata[DC.title][pattern]="" + feature_clusterid_map[feature_uri] = metadata[DC.title].to_i + } + + # Integrity check + unless cluster_query_dataset.compounds.size == 1 + raise "Number of predicted compounds is != 1." + end + + # Process data entry + query_compound_uri = cluster_query_dataset.compounds[0] + @target_clusters_array = Array.new + cluster_query_dataset.features.keys.each { |cluster_membership_feature| + + # Getting dataset URI for cluster + target_cluster = feature_clusterid_map[cluster_membership_feature] + dataset = @clusterid_dataset_map[target_cluster] + + # Finally look up presence + data_entry = cluster_query_dataset.data_entries[query_compound_uri] + present = data_entry[cluster_membership_feature][0] + + # Store result + @target_clusters_array << dataset if present > 0.5 # 0.0 for absence, 1.0 for presence + } + end + + end + module Neighbors # Local multi-linear regression (MLR) prediction from neighbors. @@ -811,6 +912,13 @@ module OpenTox (nr_zeroes == 0) # also remove feature present everywhere end + # Numeric value test + # @param[Object] value + # @return [Boolean] Whether value is a number + def self.numeric?(value) + true if Float(value) rescue false + end + # For symbolic features # @param [Array] Array to test, must indicate non-occurrence with 0. # @return [Boolean] Whether the feature has variance zero. diff --git a/lib/parser.rb b/lib/parser.rb index 4ee4a22..d0975af 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -447,12 +447,8 @@ module OpenTox end end - def numeric?(value) - true if Float(value) rescue false - end - def feature_type(value) - if numeric? value + if OpenTox::Algorithm::numeric? value return OT.NumericFeature else return OT.NominalFeature @@ -493,7 +489,7 @@ module OpenTox if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature # REGRESSION elsif feature_types(feature).include? OT.NumericFeature - @data.each{|c,row| row[feature] = nil unless numeric?(row[feature]) } # delete nominal features + @data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features @activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)." else @activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)." @@ -522,7 +518,7 @@ module OpenTox @data.each do |compound,row| unless row.empty? row.each do |feature,value| - if numeric?(value) + if OpenTox::Algorithm::numeric?(value) value = value.to_f elsif value.nil? or value.empty? value = nil @@ -545,12 +541,9 @@ module OpenTox end private - def numeric?(value) - true if Float(value) rescue false - end def feature_type(value) - if numeric? value + if OpenTox::Algorithm::numeric? value return OT.NumericFeature else return OT.NominalFeature |