summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2013-02-04 11:20:46 +0100
committermguetlein <martin.guetlein@gmail.com>2013-02-04 11:20:46 +0100
commit7a75d09664033fccaabe8e58d2e17af2abd595f3 (patch)
tree5f0a8ab76070beaf47c7b2fcbc3f25519dc0f0ff
parent5da6678857f617bdf6f6d7772e6c8788ff561221 (diff)
fix model prediction -> reintroduce predictedVariable, add some debug output
-rw-r--r--lib/algorithm/fminer.rb1
-rw-r--r--lib/model.rb3
-rw-r--r--webapp/fminer.rb2
-rw-r--r--webapp/lazar.rb61
4 files changed, 44 insertions, 23 deletions
diff --git a/lib/algorithm/fminer.rb b/lib/algorithm/fminer.rb
index 3a69858..dd341db 100644
--- a/lib/algorithm/fminer.rb
+++ b/lib/algorithm/fminer.rb
@@ -117,6 +117,7 @@ module OpenTox
else
if @prediction_feature.feature_type == "classification"
activity= value_map.invert[compound_activities].to_i # activities are mapped to 1..n
+ raise "activity could not be mapped, is #{compound_activities} (#{compound_activities.class}), available: #{value_map.values} (#{value_map.values.collect{|k| k.class}})" if activity<1
@db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
elsif @prediction_feature.feature_type == "regression"
activity= compound_activities.to_f
diff --git a/lib/model.rb b/lib/model.rb
index 76c5963..1c10b35 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -43,7 +43,8 @@ module OpenTox
prediction_feature_pos = training_dataset.feature_positions[prediction_feature.uri]
training_dataset.compounds.each_with_index { |cmpd, idx|
act = training_dataset.data_entries[idx][prediction_feature_pos]
- @acts << training_dataset.value_map(prediction_feature).invert[act]
+ @acts << (prediction_feature.feature_type=="classification" ?
+ training_dataset.value_map(prediction_feature).invert[act] : nil)
@n_prop << feature_dataset.data_entries[idx].collect.to_a
@cmpds << cmpd.uri
}
diff --git a/webapp/fminer.rb b/webapp/fminer.rb
index e928476..081a408 100644
--- a/webapp/fminer.rb
+++ b/webapp/fminer.rb
@@ -271,6 +271,8 @@ module OpenTox
row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
feature_dataset << row
}
+
+ $logger.debug "fminer found #{feature_dataset.features.size} features for #{feature_dataset.compounds.size} compounds"
feature_dataset.put @subjectid
$logger.debug feature_dataset.uri
feature_dataset.uri
diff --git a/webapp/lazar.rb b/webapp/lazar.rb
index 7fb7ea3..f761dce 100644
--- a/webapp/lazar.rb
+++ b/webapp/lazar.rb
@@ -45,6 +45,14 @@ module OpenTox
format_output(algorithm)
end
+ def predicted_variable(prediction_feature)
+ OpenTox::Feature.find_by_title("predicted_variable", {RDF.type => prediction_feature[RDF.type]})
+ end
+
+ def predicted_confidence
+ OpenTox::Feature.find_by_title("predicted_confidence", {RDF.type => [RDF::OT.NumericFeature]})
+ end
+
# Create a lazar prediction model
# @param [String] dataset_uri Training dataset URI
@@ -65,12 +73,14 @@ module OpenTox
begin
lazar = OpenTox::Model.new(nil, @subjectid)
lazar.parameters = lazar.check_params($lazar_params, params)
+ prediction_feature = OpenTox::Feature.find(lazar.find_parameter_value("prediction_feature_uri"))
lazar.metadata = {
DC.title => "lazar model",
OT.dependentVariables => lazar.find_parameter_value("prediction_feature_uri"),
+ OT.predictedVariables => [ predicted_variable(prediction_feature).uri, predicted_confidence.uri ],
OT.trainingDataset => lazar.find_parameter_value("training_dataset_uri"),
OT.featureDataset => lazar.find_parameter_value("feature_dataset_uri"),
- RDF.type => ( OpenTox::Feature.find(lazar.find_parameter_value("prediction_feature_uri")).feature_type == "classification" ?
+ RDF.type => ( prediction_feature.feature_type == "classification" ?
[OT.Model, OTA.ClassificationLazySingleTarget] :
[OT.Model, OTA.RegressionLazySingleTarget]
)
@@ -116,7 +126,7 @@ module OpenTox
$task[:uri],
@subjectid,
{
- RDF::DC.description => "Create lazar model",
+ RDF::DC.description => "Apply lazar model",
RDF::DC.creator => url_for('/lazar/predict',:full)
}
) do |task|
@@ -140,26 +150,28 @@ module OpenTox
}
@model = OpenTox::Model.new(@model_params_hash)
+ $logger.debug "Loading t dataset"
+ @training_dataset = OpenTox::Dataset.find(params[:training_dataset_uri], @subjectid)
+ @prediction_feature = OpenTox::Feature.find(params[:prediction_feature_uri],@subjectid)
+ @predicted_variable = predicted_variable(@prediction_feature)
+ @predicted_confidence = predicted_confidence
+ @similarity_feature = OpenTox::Feature.find_by_title("similarity", {RDF.type => [RDF::OT.NumericFeature]})
+ @prediction_dataset.features = [ @prediction_feature, @predicted_variable, @predicted_confidence, @similarity_feature ]
+
@prediction_dataset.metadata = {
DC.title => "Lazar prediction",
DC.creator => @uri.to_s,
OT.hasSource => @uri.to_s,
OT.dependentVariables => @model_params_hash["prediction_feature_uri"],
- OT.predictedVariables => @model_params_hash["prediction_feature_uri"]
+ OT.predictedVariables => [@predicted_variable.uri,@predicted_confidence.uri]
}
-
- $logger.debug "Loading t dataset"
- @training_dataset = OpenTox::Dataset.find(params[:training_dataset_uri], @subjectid)
- @prediction_feature = OpenTox::Feature.find(params[:prediction_feature_uri],@subjectid)
- @confidence_feature = OpenTox::Feature.find_by_title("confidence", {RDF.type => [RDF::OT.NumericFeature]})
- @similarity_feature = OpenTox::Feature.find_by_title("similarity", {RDF.type => [RDF::OT.NumericFeature]})
- @prediction_dataset.features = [ @prediction_feature, @confidence_feature, @similarity_feature ]
end
database_activity = @training_dataset.database_activity(params)
if database_activity
- prediction_value = database_activity.to_f
+ orig_value = database_activity.to_f
+ predicted_value = orig_value
confidence_value = 1.0
else
@@ -200,33 +212,38 @@ module OpenTox
mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(@model)
mtf.transform
$logger.debug "Predicting q"
- prediction = OpenTox::Algorithm::Neighbors.send(@model.prediction_algorithm, { :props => mtf.props,
- :acts => mtf.acts,
- :sims => mtf.sims,
- :value_map => @training_dataset.value_map(@prediction_feature),
- :min_train_performance => @model.min_train_performance
- } )
- prediction_value = prediction[:prediction].to_f
+ prediction = OpenTox::Algorithm::Neighbors.send(@model.prediction_algorithm,
+ { :props => mtf.props,
+ :acts => mtf.acts,
+ :sims => mtf.sims,
+ :value_map => @prediction_feature.feature_type=="classification" ?
+ @training_dataset.value_map(@prediction_feature) : nil,
+ :min_train_performance => @model.min_train_performance
+ } )
+ orig_value = nil
+ predicted_value = prediction[:prediction].to_f
confidence_value = prediction[:confidence].to_f
# AM: transform to original space
confidence_value = ((confidence_value+1.0)/2.0).abs if @model.similarity_algorithm =~ /cosine/
- prediction_value = @training_dataset.value_map(@prediction_feature)[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification"
+ predicted_value = @training_dataset.value_map(@prediction_feature)[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification"
- $logger.debug "Prediction: '#{prediction_value}'"
+ $logger.debug "Prediction: '#{predicted_value}'"
$logger.debug "Confidence: '#{confidence_value}'"
end
@prediction_dataset << [
query_compound,
- prediction_value,
+ orig_value,
+ predicted_value,
confidence_value,
nil
]
@model.neighbors.each { |neighbor|
@prediction_dataset << [
OpenTox::Compound.new(neighbor[:compound]),
- @training_dataset.value_map(@prediction_feature)[neighbor[:activity]],
+ @training_dataset.value_map(@prediction_feature)[neighbor[:activity]],
+ nil,
nil,
neighbor[:similarity]
]