Visualizations
Many Stats Can Be Plotted via Plot Recipes
s = fit!(Series(Hist(25), Hist(-5:5)), randn(10^6))
plot(s)
Naive Bayes Classifier
The NBClassifier
type stores conditional histograms of the predictor variables, allowing you to plot approximate "group by" distributions:
# make data
x = randn(10^5, 5)
y = x * [1,3,5,7,9] .> 0
o = NBClassifier(5, Bool) # 5 predictors with Boolean categories
fit!(o, (x, y))
plot(o)
Mosaic Plots
The Mosaic
type allows you to plot the relationship between two categorical variables. It is typically more useful than a bar plot, as class probabilities are given by the horizontal widths.
x = rand([true, true, false], 10^5)
y = map(xi -> xi ? rand(1:3) : rand(1:4), x)
o = fit!(Mosaic(Bool, Int), [x y])
plot(o)
Partitions
The Partition
type summarizes sections of a data stream using any OnlineStat
, and is therefore extremely useful in visualizing huge datasets, as summaries are plotted rather than every single observation.
Continuous Data
y = cumsum(randn(10^6)) + 100randn(10^6)
o = Partition(Hist(10))
fit!(o, y)
plot(o, xlab = "Nobs")
o = Partition(Mean())
o2 = Partition(Extrema())
s = Series(o, o2)
fit!(s, y)
plot(s, layout = 1, xlab = "Nobs")
Plot a custom function of the OnlineStat
s (default is value
)
Plot of mean +/- standard deviation:
o = Partition(Variance())
fit!(o, y)
plot(o, x -> [mean(x) - std(x), mean(x), mean(x) + std(x)], xlab = "Nobs")
savefig("partition_ci.png"); nothing # hide
Categorical Data
y = rand(["a", "a", "b", "c"], 10^6)
o = Partition(CountMap(String), 75)
fit!(o, y)
plot(o, xlab = "Nobs")
Indexed Partitions
The Partition
type can only track the number of observations in the x-axis. If you wish to plot one variable against another, you can use an IndexedPartition
.
x = randn(10^5)
y = x + randn(10^5)
o = fit!(IndexedPartition(Float64, Hist(10)), [x y])
plot(o, ylab = "Y", xlab = "X")
x = rand('a':'z', 10^5)
y = Float64.(x) + randn(10^5)
o = fit!(IndexedPartition(Char, Extrema()), [x y])
plot(o, xlab = "Category")
x = rand(10^5)
y = rand(1:5, 10^5)
o = fit!(IndexedPartition(Float64, CountMap(Int)), zip(x,y))
plot(o, xlab = "X", ylab = "Y")
x = rand(Date(2000):Date(2020), 10^5)
y = Dates.year.(x) + randn(10^5)
o = fit!(IndexedPartition(Date, Hist(20)), [x y])
plot(o, xlab = "Date")
WARNING: convert(::Type{R}, x::Dates.Date) where R <: Real is deprecated, use R(Dates.value(x)) instead.
Stacktrace:
[1] depwarn(::String, ::Symbol) at ./deprecated.jl:70
[2] convert(::Type{Float64}, ::Date) at ./deprecated.jl:57
[3] push!(::Array{Float64,1}, ::Date) at ./array.jl:646
[4] macro expansion at /home/travis/.julia/v0.6/OnlineStats/src/viz/recipes.jl:223 [inlined]
[5] apply_recipe(::Dict{Symbol,Any}, ::Array{OnlineStats.Part{Date,OnlineStats.Hist{Number,OnlineStats.AdaptiveBins}},1}) at /home/travis/.julia/v0.6/RecipesBase/src/RecipesBase.jl:291
[6] _process_userrecipes(::Plots.Plot{Plots.GRBackend}, ::Dict{Symbol,Any}, ::Tuple{OnlineStats.IndexedPartition{Number,OnlineStats.Hist{Number,OnlineStats.AdaptiveBins},Date}}) at /home/travis/.julia/v0.6/Plots/src/pipeline.jl:81
[7] _plot!(::Plots.Plot{Plots.GRBackend}, ::Dict{Symbol,Any}, ::Tuple{OnlineStats.IndexedPartition{Number,OnlineStats.Hist{Number,OnlineStats.AdaptiveBins},Date}}) at /home/travis/.julia/v0.6/Plots/src/plot.jl:179
[8] (::RecipesBase.#kw##plot)(::Array{Any,1}, ::RecipesBase.#plot, ::OnlineStats.IndexedPartition{Number,OnlineStats.Hist{Number,OnlineStats.AdaptiveBins},Date}) at ./<missing>:0
[9] cd(::Documenter.Expanders.##8#10{Module}, ::String) at ./file.jl:70
[10] (::Documenter.Utilities.##19#20{Documenter.Expanders.##7#9{Documenter.Documents.Page,Module},Base.PipeEndpoint,Base.PipeEndpoint,Pipe,Array{UInt8,1}})() at /home/travis/.julia/v0.6/Documenter/src/Utilities/Utilities.jl:593
[11] withoutput(::Documenter.Expanders.##7#9{Documenter.Documents.Page,Module}) at /home/travis/.julia/v0.6/Documenter/src/Utilities/Utilities.jl:591
[12] runner(::Type{Documenter.Expanders.ExampleBlocks}, ::Base.Markdown.Code, ::Documenter.Documents.Page, ::Documenter.Documents.Document) at /home/travis/.julia/v0.6/Documenter/src/Expanders.jl:478
[13] dispatch(::Type{Documenter.Expanders.ExpanderPipeline}, ::Base.Markdown.Code, ::Vararg{Any,N} where N) at /home/travis/.julia/v0.6/Documenter/src/Selectors.jl:168
[14] expand(::Documenter.Documents.Document) at /home/travis/.julia/v0.6/Documenter/src/Expanders.jl:31
[15] runner(::Type{Documenter.Builder.ExpandTemplates}, ::Documenter.Documents.Document) at /home/travis/.julia/v0.6/Documenter/src/Builder.jl:178
[16] dispatch(::Type{Documenter.Builder.DocumentPipeline}, ::Documenter.Documents.Document, ::Vararg{Documenter.Documents.Document,N} where N) at /home/travis/.julia/v0.6/Documenter/src/Selectors.jl:168
[17] cd(::Documenter.##2#3{Documenter.Documents.Document}, ::String) at ./file.jl:70
[18] #makedocs#1(::Bool, ::Array{Any,1}, ::Function) at /home/travis/.julia/v0.6/Documenter/src/Documenter.jl:203
[19] (::Documenter.#kw##makedocs)(::Array{Any,1}, ::Documenter.#makedocs) at ./<missing>:0
[20] include_from_node1(::String) at ./loading.jl:576
[21] include(::String) at ./sysimg.jl:14
[22] eval(::Module, ::Any) at ./boot.jl:235
[23] process_options(::Base.JLOptions) at ./client.jl:286
[24] _start() at ./client.jl:371
while loading /home/travis/.julia/v0.6/OnlineStats/docs/make.jl, in expression starting on line 3