summaryrefslogtreecommitdiff
path: root/lazar.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2011-05-23 14:33:31 +0000
committerChristoph Helma <helma@in-silico.ch>2011-05-23 14:33:31 +0000
commit6cb25029a9e6f8dc397f642892a020ab4de4c0f4 (patch)
treed341ac0a742efcd96d78b56d75c24b3cff7d1477 /lazar.rb
parentf48e41548ebb693c1abba8ad1b2a671fae0c118a (diff)
parent5b9783ef4eee9a15e801c5781848d5bc9e488110 (diff)
fminer.rb
Diffstat (limited to 'lazar.rb')
-rw-r--r--lazar.rb70
1 files changed, 41 insertions, 29 deletions
diff --git a/lazar.rb b/lazar.rb
index a4e006b..6bedf95 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -35,17 +35,17 @@ post '/lazar/?' do
halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
training_activities.load_all(@subjectid)
- prediction_feature = params[:prediction_feature]
- unless prediction_feature # try to read prediction_feature from dataset
+ prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
+ unless params[:prediction_feature] # try to read prediction_feature from dataset
halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
- prediction_feature = training_activities.features.keys.first
- params[:prediction_feature] = prediction_feature
+ prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
+ params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
end
feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
- halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature)
+ halt 404, "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+
+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
@@ -55,7 +55,7 @@ post '/lazar/?' do
if params[:feature_dataset_uri]
feature_dataset_uri = params[:feature_dataset_uri]
training_features = OpenTox::Dataset.new(feature_dataset_uri)
- case training_features.feature_type
+ case training_features.feature_type(@subjectid)
when "classification"
lazar.similarity_algorithm = "Similarity.tanimoto"
when "regression"
@@ -73,7 +73,6 @@ post '/lazar/?' do
if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/)
params[:feature_type] = "paths"
end
- prediction_feature = prediction_feature.uri #hotfix this will change in future version see development branch 2011/04/06 mr
feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s
training_features = OpenTox::Dataset.new(feature_dataset_uri)
end
@@ -82,21 +81,24 @@ post '/lazar/?' do
halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
# sorted features for index lookups
- lazar.features = training_features.features.sort if training_features.feature_type == "regression"
+
+ lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match"
training_features.data_entries.each do |compound,entry|
lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
entry.keys.each do |feature|
- if feature_generation_uri.match(/fminer/)
- smarts = training_features.features[feature][OT.smarts]
- lazar.fingerprints[compound] << smarts
- unless lazar.features.include? smarts
- lazar.features << smarts
- lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
- lazar.effects[smarts] = training_features.features[feature][OT.effect]
+ if lazar.feature_calculation_algorithm == "Substructure.match"
+ if training_features.features[feature]
+ smarts = training_features.features[feature][OT.smarts]
+ lazar.fingerprints[compound] << smarts
+ unless lazar.features.include? smarts
+ lazar.features << smarts
+ lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
+ lazar.effects[smarts] = training_features.features[feature][OT.effect]
+ end
end
else
- case training_features.feature_type
+ case training_features.feature_type(@subjectid)
when "classification"
# fingerprints are sets
if entry[feature].flatten.size == 1
@@ -117,27 +119,37 @@ post '/lazar/?' do
end
end
+ @training_classes = training_activities.feature_classes(prediction_feature.uri, @subjectid) if prediction_feature.feature_type == "classification"
+ lazar.prediction_algorithm = "Neighbors.local_svm_regression" if prediction_feature.feature_type == "regression"
+
training_activities.data_entries.each do |compound,entry|
lazar.activities[compound] = [] unless lazar.activities[compound]
- unless entry[params[:prediction_feature]].empty?
- entry[params[:prediction_feature]].each do |value|
- case value.to_s
- when "true"
- lazar.activities[compound] << true
- when "false"
- lazar.activities[compound] << false
- else
+ unless entry[prediction_feature.uri].empty?
+ entry[prediction_feature.uri].each do |value|
+ if prediction_feature.feature_type == "classification"
+ case value.to_s
+ when "true"
+ lazar.activities[compound] << true
+ when "false"
+ lazar.activities[compound] << false
+ when /#{@training_classes.last}/
+ lazar.activities[compound] << true
+ when /#{@training_classes.first}/
+ lazar.activities[compound] << false
+ else
+ LOGGER.warn "Unknown class \"#{value.to_s}\"."
+ end
+ elsif prediction_feature.feature_type == "regression"
halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0
lazar.activities[compound] << value.to_f
- lazar.prediction_algorithm = "Neighbors.local_svm_regression"
end
end
end
end
- lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}"
+ lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}"
# TODO: fix dependentVariable
- lazar.metadata[OT.dependentVariables] = params[:prediction_feature]
+ lazar.metadata[OT.dependentVariables] = prediction_feature.uri
lazar.metadata[OT.trainingDataset] = dataset_uri
lazar.metadata[OT.featureDataset] = feature_dataset_uri
if training_activities.feature_type.to_s == "classification"
@@ -148,7 +160,7 @@ post '/lazar/?' do
lazar.metadata[OT.parameters] = [
{DC.title => "dataset_uri", OT.paramValue => dataset_uri},
- {DC.title => "prediction_feature", OT.paramValue => prediction_feature},
+ {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri},
{DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}
]