summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordavor <vorgrimmlerdavid@gmx.de>2011-12-29 16:08:46 +0100
committerdavor <vorgrimmlerdavid@gmx.de>2011-12-29 16:08:46 +0100
commitc2cd607e265022661c176e9ec3cc103e0a6fc4cd (patch)
tree23c8d3439d74b3561fae62828db0b9eded548c9d
parent0553eddba202ae481a1cdc3b7cc59002c4777ad4 (diff)
parent2e7ff3936adfea4ad4bc456a13b2c2fed0ad581b (diff)
Merge branch 'new_pc_dv' into pc_new_1
-rw-r--r--lazar.rb144
1 files changed, 62 insertions, 82 deletions
diff --git a/lazar.rb b/lazar.rb
index 9aac0d8..f69495c 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -45,45 +45,76 @@ post '/lazar/?' do
task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
+
+ # # # Dataset present, prediction feature present?
raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
training_activities.load_all(@subjectid)
+ # Prediction Feature
prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
unless params[:prediction_feature] # try to read prediction_feature from dataset
raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
end
+ raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
- feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
-
- raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
+ # Feature Generation URI
+ feature_generation_uri = @@feature_generation_default unless ( (feature_generation_uri = params[:feature_generation_uri]) || (params[:feature_dataset_uri]) )
+ # Create instance
lazar = OpenTox::Model::Lazar.new
- lazar.min_sim = params[:min_sim].to_f if params[:min_sim]
- # AM: Manage endpoint related variables.
+ # # # ENDPOINT RELATED
+
+ # Default Values
+ # Classification: Weighted Majority, Substructure.match
if prediction_feature.feature_type == "classification"
@training_classes = training_activities.accept_values(prediction_feature.uri).sort
@training_classes.each_with_index { |c,i|
lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later.
params[:value_map] = lazar.value_map
}
+ # Regression: SVM, Substructure.match_hits
elsif prediction_feature.feature_type == "regression"
- lazar.nr_hits = true
+ #lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile)
+ lazar.feature_calculation_algorithm = "Substructure.match_hits"
lazar.prediction_algorithm = "Neighbors.local_svm_regression"
end
+
+
+
+ # # # USER VALUES
+
+ # Min Sim
+ lazar.min_sim = params[:min_sim].to_f if params[:min_sim]
+
+ # Nr Hits
if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass)
- lazar.nr_hits = false
+ #lazar.nr_hits = false
+ lazar.feature_calculation_algorithm = "Substructure.match"
elsif params[:nr_hits] == "true"
- lazar.nr_hits = true
+ #lazar.nr_hits = true
+ lazar.feature_calculation_algorithm = "Substructure.match_hits"
end
- params[:nr_hits] = "true" if lazar.nr_hits
+ params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed
+
+ # Algorithm
+ lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil?
+
+ # Propositionalization
+ lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop")
+
+ # PC type
+ lazar.pc_type = params[:pc_type] unless params[:pc_type].nil?
+
+ # Conf_stdev
+ lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false )
+
@@ -96,29 +127,20 @@ post '/lazar/?' do
- #
- # AM: features
- #
- #
- #
+ # # # Features
- # READ OR CREATE
+ # Read Features
if params[:feature_dataset_uri]
+ lazar.feature_calculation_algorithm = "Substructure.lookup"
feature_dataset_uri = params[:feature_dataset_uri]
training_features = OpenTox::Dataset.new(feature_dataset_uri)
- case training_features.feature_type(@subjectid)
- when "classification"
- lazar.similarity_algorithm = "Similarity.tanimoto"
- when "regression"
- lazar.similarity_algorithm = "Similarity.euclid"
+ if training_features.feature_type(@subjectid) == "regression"
+ lazar.similarity_algorithm = "Similarity.cosine"
end
- else # create features
+
+ # Create Features
+ else
params[:feature_generation_uri] = feature_generation_uri
- if feature_generation_uri.match(/fminer/)
- lazar.feature_calculation_algorithm = "Substructure.match"
- else
- raise OpenTox::NotFoundError.new "External feature generation services not yet supported"
- end
params[:subjectid] = @subjectid
prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/)
@@ -130,27 +152,23 @@ post '/lazar/?' do
- # WRITE IN MODEL
+ # # # Write fingerprints
training_features.load_all(@subjectid)
raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil?
- # sorted features for index lookups
-
- lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match"
-
training_features.data_entries.each do |compound,entry|
lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound]
entry.keys.each do |feature|
# CASE 1: Substructure
- if lazar.feature_calculation_algorithm == "Substructure.match"
+ if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits"
if training_features.features[feature]
smarts = training_features.features[feature][OT.smarts]
#lazar.fingerprints[compound] << smarts
- if params[:nr_hits]
- lazar.fingerprints[compound][smarts] = entry[feature].flatten.first
+ if lazar.feature_calculation_algorithm == "Substructure.match_hits"
+ lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue]
else
- lazar.fingerprints[compound][smarts] = 1
+ lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue]
end
unless lazar.features.include? smarts
lazar.features << smarts
@@ -160,26 +178,11 @@ post '/lazar/?' do
end
# CASE 2: Others
+ elsif entry[feature].flatten.size == 1
+ lazar.fingerprints[compound][feature] = entry[feature].flatten.first
+ lazar.features << feature unless lazar.features.include? feature
else
- case training_features.feature_type(@subjectid)
- when "classification"
- # fingerprints are sets
- if entry[feature].flatten.size == 1
- #lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
- lazar.fingerprints[compound][feature] = entry[feature].flatten.first if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
- lazar.features << feature unless lazar.features.include? feature
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
- end
- when "regression"
- # fingerprints are arrays
- if entry[feature].flatten.size == 1
- lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
- #lazar.fingerprints[compound][feature] = entry[feature].flatten.first
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
- end
- end
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
end
end
end
@@ -188,28 +191,8 @@ post '/lazar/?' do
-
- #
- # AM: SETTINGS
- #
- #
- #
-
- # AM: allow settings override by user
- lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil?
- lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop")
- lazar.conf_stdev = false
- lazar.conf_stdev = true if params[:conf_stdev] == "true"
-
-
-
-
-
- #
- # AM: Feed data
- #
- #
- #
+
+ # # # Activities
if prediction_feature.feature_type == "regression"
training_activities.data_entries.each do |compound,entry|
@@ -235,11 +218,7 @@ post '/lazar/?' do
- #
- # AM: Metadata
- #
- #
- #
+ # Metadata
lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}"
lazar.metadata[OT.dependentVariables] = prediction_feature.uri
@@ -261,6 +240,7 @@ post '/lazar/?' do
model_uri = lazar.save(@subjectid)
LOGGER.info model_uri + " created #{Time.now}"
model_uri
+
end
response['Content-Type'] = 'text/uri-list'
raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"