1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc")
# Get RDF/XML representation of the lazar algorithm
# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm
get '/lazar/?' do
response['Content-Type'] = 'application/rdf+xml'
algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full))
algorithm.metadata = {
DC.title => 'lazar',
DC.creator => "helma@in-silico.ch, andreas@maunz.de",
DC.contributor => "vorgrimmlerdavid@gmx.de",
OT.parameters => [
{ DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
{ DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" },
{ DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
{ DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" },
{ DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" }
]
}
algorithm.to_rdfxml
end
# Create a lazar prediction model
# @param [String] dataset_uri Training dataset URI
# @param [optional,String] prediction_feature URI of the feature to be predicted
# @param [optional,String] feature_generation_uri URI of the feature generation algorithm
# @param [optional,String] - further parameters for the feature generation service
# @return [text/uri-list] Task URI
post '/lazar/?' do
params[:subjectid] = @subjectid
halt 404, "No dataset_uri parameter." unless params[:dataset_uri]
dataset_uri = params[:dataset_uri]
halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
training_activities.load_all(@subjectid)
prediction_feature = params[:prediction_feature]
unless prediction_feature # try to read prediction_feature from dataset
halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
prediction_feature = training_activities.features.keys.first
params[:prediction_feature] = prediction_feature
end
feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+
training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature)
task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
lazar = OpenTox::Model::Lazar.new
lazar.min_sim = params[:min_sim] if params[:min_sim]
if params[:feature_dataset_uri]
feature_dataset_uri = params[:feature_dataset_uri]
training_features = OpenTox::Dataset.new(feature_dataset_uri)
case training_features.feature_type
when "classification"
lazar.similarity_algorithm = "Similarity.tanimoto"
when "regression"
lazar.similarity_algorithm = "Similarity.euclid"
end
else # create features
params[:feature_generation_uri] = feature_generation_uri
if feature_generation_uri.match(/fminer/)
lazar.feature_calculation_algorithm = "Substructure.match"
else
halt 404, "External feature generation services not yet supported"
end
params[:subjectid] = @subjectid
feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s
training_features = OpenTox::Dataset.new(feature_dataset_uri)
end
training_features.load_all(@subjectid)
halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
# sorted features for index lookups
lazar.features = training_features.features.sort if training_features.feature_type == "regression"
training_features.data_entries.each do |compound,entry|
lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
entry.keys.each do |feature|
if feature_generation_uri.match(/fminer/)
smarts = training_features.features[feature][OT.smarts]
lazar.fingerprints[compound] << smarts
unless lazar.features.include? smarts
lazar.features << smarts
lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
lazar.effects[smarts] = training_features.features[feature][OT.effect]
end
else
case training_features.feature_type
when "classification"
# fingerprints are sets
if entry[feature].flatten.size == 1
lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
lazar.features << feature unless lazar.features.include? feature
else
LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
end
when "regression"
# fingerprints are arrays
if entry[feature].flatten.size == 1
lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
else
LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
end
end
end
end
end
training_activities.data_entries.each do |compound,entry|
lazar.activities[compound] = [] unless lazar.activities[compound]
unless entry[params[:prediction_feature]].empty?
entry[params[:prediction_feature]].each do |value|
case value.to_s
when "true"
lazar.activities[compound] << true
when "false"
lazar.activities[compound] << false
else
halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0
lazar.activities[compound] << value.to_f
lazar.prediction_algorithm = "Neighbors.local_svm_regression"
end
end
end
end
lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}"
# TODO: fix dependentVariable
lazar.metadata[OT.dependentVariables] = params[:prediction_feature]
lazar.metadata[OT.trainingDataset] = dataset_uri
lazar.metadata[OT.featureDataset] = feature_dataset_uri
if training_activities.feature_type.to_s == "classification"
lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget
elsif training_activities.feature_type.to_s == "regression"
lazar.metadata[OT.isA] = OTA.RegressionLazySingleTarget
end
lazar.metadata[OT.parameters] = [
{DC.title => "dataset_uri", OT.paramValue => dataset_uri},
{DC.title => "prediction_feature", OT.paramValue => prediction_feature},
{DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}
]
model_uri = lazar.save(@subjectid)
LOGGER.info model_uri + " created #{Time.now}"
model_uri
end
response['Content-Type'] = 'text/uri-list'
halt 503,task.uri+"\n" if task.status == "Cancelled"
halt 202,task.uri
end
|