diff options
author | mguetlein <martin.guetlein@gmail.com> | 2012-07-04 15:16:58 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2012-07-04 15:16:58 +0200 |
commit | c3865b1887979933494778703e469c549335a6c3 (patch) | |
tree | 8caeca53533a17505165c8c801beaa6a95027645 | |
parent | 411f35653986227ddc314526043854f3c36e014e (diff) |
dirty speedup hack for parse csv
-rw-r--r-- | lib/parser.rb | 15 | ||||
-rw-r--r-- | lib/r-util.rb | 22 |
2 files changed, 22 insertions, 15 deletions
diff --git a/lib/parser.rb b/lib/parser.rb index 2193cf4..7bf46cf 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -373,7 +373,8 @@ module OpenTox } } - input.each_with_index { |row, i| + first_data_row = true + input.each_with_index { |row, i| drop=false row = split_row(row) raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size @@ -382,7 +383,10 @@ module OpenTox drop=true drop_missing=true if (row.count("") == row.size-1) end - add_values(row, regression_features) unless (drop_missing && drop) + unless (drop_missing && drop) + add_values(row, regression_features, first_data_row) + first_data_row = false + end if (drop_missing && drop) @format_errors << "Row #{i} not added" end @@ -458,7 +462,7 @@ module OpenTox # @param Array A row split up as an array # @param Array Indicator for regression for each field # @param Array Indices for duplicate features - def add_values(row, regression_features) + def add_values(row, regression_features, add_features=true) id = row.shift case id @@ -475,6 +479,8 @@ module OpenTox @duplicates[compound.inchi] = [] unless @duplicates[compound.inchi] @duplicates[compound.inchi] << id+", "+row.join(", ") + @dataset.add_compound(compound.uri) + feature_idx = 0 row.each_index do |i| @@ -502,7 +508,8 @@ module OpenTox feature_idx += 1 if val != nil - @dataset.add(compound.uri, feature, val) + @dataset.add_feature(feature) if add_features + @dataset.add(compound.uri, feature, val, true) if @feature_types[feature].include? OT.NominalFeature @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue] @dataset.features[feature][OT.acceptValue] << val unless @dataset.features[feature][OT.acceptValue].include?(val) diff --git a/lib/r-util.rb b/lib/r-util.rb index 8357d0f..bbf98f3 100644 --- a/lib/r-util.rb +++ b/lib/r-util.rb @@ -61,11 +61,11 @@ module OpenTox # <0 -> array1 << array2 # 0 -> no significant difference # >0 -> array2 >> array1 - def paired_ttest(array1, array2, significance_level=0.95) + def ttest(array1, array2, paired, significance_level=0.95) @r.assign "v1",array1 @r.assign "v2",array2 - paired = array1.size==array2.size ? "T" : "F" - @r.eval "ttest = t.test(as.numeric(v1),as.numeric(v2),paired=#{paired})" + raise if paired && array1.size!=array2.size + @r.eval "ttest = t.test(as.numeric(v1),as.numeric(v2),paired=#{paired ? "T" : "F"})" t = @r.pull "ttest$statistic" p = @r.pull "ttest$p.value" if (1-significance_level > p) @@ -83,7 +83,7 @@ module OpenTox end - def ttest(array1, value2, significance_level=0.95) + def ttest_single_value(array1, value2, significance_level=0.95) @r.assign "v1",array1 @r.eval "ttest = t.test(as.numeric(v1),conf.level=#{significance_level})" min = @r.pull "ttest$conf.int[1]" @@ -151,7 +151,7 @@ module OpenTox hlines << [max_median,2+max_median_idx] hlines << [min_median,2+min_median_idx] plot_to_files(files, hlines) do |file| - @r.eval "boxplot(boxdata,main='#{title}',col=rep(2:#{data.size+1})#{param_str})" + @r.eval "superboxplot(boxdata,main='#{title}',col=rep(2:#{data.size+1})#{param_str})" end end @@ -522,20 +522,20 @@ module OpenTox begin File.delete(tmp); rescue; end end - @svg_plot_width = 14 - @svg_plot_height = 10 - + @@svg_plot_width = 12 + @@svg_plot_height = 8 + public def set_svg_plot_size(width,height) - @svg_plot_width = width - @svg_plot_height = height + @@svg_plot_width = width + @@svg_plot_height = height end private def plot_to_files(files,hlines=nil) files.each do |file| if file=~/(?i)\.svg/ - @r.eval("svg('#{file}',#{@svg_plot_width},#{@svg_plot_height})") + @r.eval("svg('#{file}',#{@@svg_plot_width},#{@@svg_plot_height})") elsif file=~/(?i)\.png/ @r.eval("png('#{file}')") else |