summaryrefslogtreecommitdiff
path: root/lazar.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lazar.rb')
-rw-r--r--lazar.rb259
1 files changed, 0 insertions, 259 deletions
diff --git a/lazar.rb b/lazar.rb
deleted file mode 100644
index 1fe28f8..0000000
--- a/lazar.rb
+++ /dev/null
@@ -1,259 +0,0 @@
-@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc")
-
-# Get RDF/XML representation of the lazar algorithm
-# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm
-get '/lazar/?' do
- algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full))
- algorithm.metadata = {
- DC.title => 'lazar',
- DC.creator => "helma@in-silico.ch, andreas@maunz.de",
- DC.contributor => "vorgrimmlerdavid@gmx.de",
-# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#lazar",
- OT.parameters => [
- { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "URI of feature generation service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
- { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" },
- { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" }
- ]
- }
- case request.env['HTTP_ACCEPT']
- when /text\/html/
- content_type "text/html"
- OpenTox.text_to_html algorithm.to_yaml
- when /application\/x-yaml/
- content_type "application/x-yaml"
- algorithm.to_yaml
- else
- response['Content-Type'] = 'application/rdf+xml'
- algorithm.to_rdfxml
- end
-end
-
-# Create a lazar prediction model
-# @param [String] dataset_uri Training dataset URI
-# @param [optional,String] prediction_feature URI of the feature to be predicted
-# @param [optional,String] feature_generation_uri URI of the feature generation algorithm
-# @param [optional,String] - further parameters for the feature generation service
-# @return [text/uri-list] Task URI
-post '/lazar/?' do
-
- LOGGER.debug "building lazar model with params: "+params.inspect
- params[:subjectid] = @subjectid
- raise OpenTox::NotFoundError.new "No dataset_uri parameter." unless params[:dataset_uri]
- dataset_uri = params[:dataset_uri]
-
- task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
-
-
- # # # Dataset present, prediction feature present?
- raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
- training_activities.load_all(@subjectid)
-
- # Prediction Feature
- prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
- unless params[:prediction_feature] # try to read prediction_feature from dataset
- raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
- prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
- params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
- end
- raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
-
- # Feature Generation URI
- feature_generation_uri = @@feature_generation_default unless ( (feature_generation_uri = params[:feature_generation_uri]) || (params[:feature_dataset_uri]) )
-
- # Create instance
- lazar = OpenTox::Model::Lazar.new
-
-
-
-
- # # # ENDPOINT RELATED
-
- # Default Values
- # Classification: Weighted Majority, Substructure.match
- if prediction_feature.feature_type == "classification"
- @training_classes = training_activities.accept_values(prediction_feature.uri).sort
- @training_classes.each_with_index { |c,i|
- lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later.
- params[:value_map] = lazar.value_map
- }
- # Regression: SVM, Substructure.match_hits
- elsif prediction_feature.feature_type == "regression"
- lazar.feature_calculation_algorithm = "Substructure.match_hits"
- lazar.prediction_algorithm = "Neighbors.local_svm_regression"
- end
-
-
-
-
- # # # USER VALUES
-
- # Min Sim
- min_sim = params[:min_sim].to_f if params[:min_sim]
- min_sim = 0.3 unless params[:min_sim]
-
- # Algorithm
- lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm]
-
- # Nr Hits
- nr_hits = false
- if params[:nr_hits] == "true" || lazar.prediction_algorithm.include?("local_svm")
- lazar.feature_calculation_algorithm = "Substructure.match_hits"
- nr_hits = true
- end
- params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed
-
- # Propositionalization
- propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true)
-
- # PC type
- pc_type = params[:pc_type] unless params[:pc_type].nil?
-
- # Min train performance
- min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance]
- min_train_performance = 0.1 unless params[:min_train_performance]
-
-
-
-
-
-
- task.progress 10
-
-
-
-
-
- # # # Features
-
- # Read Features
- if params[:feature_dataset_uri]
- lazar.feature_calculation_algorithm = "Substructure.lookup"
- feature_dataset_uri = params[:feature_dataset_uri]
- training_features = OpenTox::Dataset.new(feature_dataset_uri)
- if training_features.feature_type(@subjectid) == "regression"
- lazar.similarity_algorithm = "Similarity.cosine"
- min_sim = 0.4 unless params[:min_sim]
- raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type]
- end
-
- # Create Features
- else
- params[:feature_generation_uri] = feature_generation_uri
- params[:subjectid] = @subjectid
- prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
- if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/)
- params[:feature_type] = "paths" unless params[:feature_type]
- end
- feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params, OpenTox::SubTask.new(task,10,70)).to_s
- training_features = OpenTox::Dataset.new(feature_dataset_uri)
- end
-
-
-
- # # # Write fingerprints
- training_features.load_all(@subjectid)
- raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil?
-
- training_features.data_entries.each do |compound,entry|
- if training_activities.data_entries.has_key? compound
-
- lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound]
- entry.keys.each do |feature|
-
- # CASE 1: Substructure
- if (lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits")
- if training_features.features[feature]
- smarts = training_features.features[feature][OT.smarts]
- #lazar.fingerprints[compound] << smarts
- if lazar.feature_calculation_algorithm == "Substructure.match_hits"
- lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue]
- else
- lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue]
- end
- unless lazar.features.include? smarts
- lazar.features << smarts
- lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
- lazar.effects[smarts] = training_features.features[feature][OT.effect]
- end
- end
-
- # CASE 2: Others
- elsif entry[feature].flatten.size == 1
- lazar.fingerprints[compound][feature] = entry[feature].flatten.first
- lazar.features << feature unless lazar.features.include? feature
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
- end
- end
-
- end
- end
- task.progress 80
-
-
-
-
-
- # # # Activities
-
- if prediction_feature.feature_type == "regression"
- training_activities.data_entries.each do |compound,entry|
- lazar.activities[compound] = [] unless lazar.activities[compound]
- unless entry[prediction_feature.uri].empty?
- entry[prediction_feature.uri].each do |value|
- lazar.activities[compound] << value
- end
- end
- end
- elsif prediction_feature.feature_type == "classification"
- training_activities.data_entries.each do |compound,entry|
- lazar.activities[compound] = [] unless lazar.activities[compound]
- unless entry[prediction_feature.uri].empty?
- entry[prediction_feature.uri].each do |value|
- lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals
- end
- end
- end
- end
- task.progress 90
-
-
-
-
- # Metadata
-
- lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}"
- lazar.metadata[OT.dependentVariables] = prediction_feature.uri
- lazar.metadata[OT.trainingDataset] = dataset_uri
- lazar.metadata[OT.featureDataset] = feature_dataset_uri
- case training_activities.feature_type(@subjectid)
- when "classification"
- lazar.metadata[RDF.type] = [OT.Model, OTA.ClassificationLazySingleTarget]
- when "regression"
- lazar.metadata[RDF.type] = [OT.Model, OTA.RegressionLazySingleTarget]
- end
-
- lazar.metadata[OT.parameters] = [
- {DC.title => "dataset_uri", OT.paramValue => dataset_uri},
- {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri},
- {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri},
- {DC.title => "propositionalized", OT.paramValue => propositionalized},
- {DC.title => "pc_type", OT.paramValue => pc_type},
- {DC.title => "nr_hits", OT.paramValue => nr_hits},
- {DC.title => "min_sim", OT.paramValue => min_sim},
- {DC.title => "min_train_performance", OT.paramValue => min_train_performance},
-
- ]
-
- model_uri = lazar.save(@subjectid)
- LOGGER.info model_uri + " created #{Time.now}"
- model_uri
-
- end
- response['Content-Type'] = 'text/uri-list'
- raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
- halt 202,task.uri
-end
-