···11+opam-version: "2.0"
22+authors: "Francois Berenger"
33+maintainer: "unixjunkie@sdf.org"
44+homepage: "https://github.com/UnixJunkie/linwrap"
55+bug-reports: "https://github.com/UnixJunkie/linwrap/issues"
66+dev-repo: "git+https://github.com/UnixJunkie/linwrap.git"
77+license: "BSD-3-Clause"
88+build: ["dune" "build" "-p" name "-j" jobs]
99+install: ["cp" "bin/ecfp6.py" "%{bin}%/linwrap_ecfp6.py"]
1010+depends: [
1111+ "base-unix"
1212+ "batteries" {>= "3.3.0"}
1313+ "bst"
1414+ "conf-liblinear-tools"
1515+ "cpm" {>= "11.0.0"}
1616+ "dokeysto" # possible perf. regr.: dokeysto_camltc -> dokeysto
1717+ "ocaml" {>= "5.0.0"} # because camltc not yet ready for ocaml>=5.0.0
1818+ "dolog" {>= "6.0.0"}
1919+ "dune" {>= "1.10"}
2020+ "minicli" {>= "5.0.0"}
2121+ "molenc"
2222+ "parany" {>= "11.0.0"}
2323+]
2424+# the software can compile and install without the depopts.
2525+# however, some tools and options will not work anymore at run-time
2626+depopts: [
2727+ "conf-gnuplot"
2828+ "conf-python-3"
2929+ "conf-rdkit"
3030+]
3131+synopsis: "Wrapper on top of liblinear-tools"
3232+description: """
3333+Linwrap can be used to train a L2-regularized logistic regression classifier
3434+or a linear Support Vector Regressor.
3535+You can optimize C (the L2 regularization parameter), w (the class weight)
3636+or k (the number of bags, i.e. use bagging).
3737+You can also find the optimal classification threshold using MCC maximization,
3838+use k-folds cross validation, parallelization, etc.
3939+In the regression case, you can only optimize C and epsilon.
4040+4141+When using bagging, each model is trained on balanced bootstraps
4242+from the training set (one bootstrap for the positive class,
4343+one for the negative class).
4444+The size of the bootstrap is the size of the smallest (under-represented)
4545+class.
4646+4747+usage: linwrap
4848+ -i <filename>: training set or DB to screen
4949+ [-o <filename>]: predictions output file
5050+ [-np <int>]: ncores
5151+ [-c <float>]: fix C
5252+ [-e <float>]: fix epsilon (for SVR);
5353+ (0 <= epsilon <= max_i(|y_i|))
5454+ [-w <float>]: fix w1
5555+ [--no-plot]: no gnuplot
5656+ [-k <int>]: number of bags for bagging (default=off)
5757+ [{-n|--NxCV} <int>]: folds of cross validation
5858+ [--mcc-scan]: MCC scan for a trained model (requires n>1)
5959+ also requires (c, w, k) to be known
6060+ [--seed <int>]: fix random seed
6161+ [-p <float>]: training set portion (in [0.0:1.0])
6262+ [--pairs]: read from .AP files (atom pairs; will offset feat. indexes by 1)
6363+ [--train <train.liblin>]: training set (overrides -p)
6464+ [--valid <valid.liblin>]: validation set (overrides -p)
6565+ [--test <test.liblin>]: test set (overrides -p)
6666+ [{-l|--load} <filename>]: prod. mode; use trained models
6767+ [{-s|--save} <filename>]: train. mode; save trained models
6868+ [-f]: force overwriting existing model file
6969+ [--scan-c]: scan for best C
7070+ [--scan-e <int>]: epsilon scan #steps for SVR
7171+ [--regr]: regression (SVR); also, implied by -e and --scan-e
7272+ [--scan-w]: scan weight to counter class imbalance
7373+ [--w-range <float>:<int>:<float>]: specific range for w
7474+ (semantic=start:nsteps:stop)
7575+ [--e-range <float>:<int>:<float>]: specific range for e
7676+ (semantic=start:nsteps:stop)
7777+ [--c-range <float,float,...>] explicit scan range for C
7878+ (example='0.01,0.02,0.03')
7979+ [--k-range <int,int,...>] explicit scan range for k
8080+ (example='1,2,3,5,10')
8181+ [--scan-k]: scan number of bags (advice: optim. k rather than w)
8282+"""
8383+url {
8484+ src: "https://github.com/UnixJunkie/linwrap/archive/v9.1.5.tar.gz"
8585+ checksum: "md5=f59e8b0452a5bb33f0fe239e524b5b40"
8686+}