1
|
|
# -*- coding: utf-8 -*-
|
2
|
1
|
"""
|
3
|
|
@file
|
4
|
|
@brief Main functions to convert machine learned model from *scikit-learn* model.
|
5
|
|
"""
|
6
|
1
|
from .g_sklearn_identify import identify_interpreter
|
7
|
|
|
8
|
|
|
9
|
1
|
def sklearn2graph(model, output_names=None, **kwargs):
|
10
|
|
"""
|
11
|
|
Converts any kind of *scikit-learn* model into a *grammar* model.
|
12
|
|
|
13
|
|
@param model scikit-learn model
|
14
|
|
@param output_names names of the outputs
|
15
|
|
@param kwargs additional parameters, sent to the converter
|
16
|
|
@return converter to grammar model
|
17
|
|
|
18
|
|
Short list of additional parameters:
|
19
|
|
- *with_loop*: the pseudo code includes loops,
|
20
|
|
this option is not available everywhere.
|
21
|
|
|
22
|
|
If *output_names* is None, default values
|
23
|
|
will be given to the inputs and outputs.
|
24
|
|
One example on how to use this function.
|
25
|
|
A *scikit-learn* model is trained and converted
|
26
|
|
into a graph which implements the prediction
|
27
|
|
function with the *grammar* language.
|
28
|
|
|
29
|
|
.. runpython::
|
30
|
|
:showcode:
|
31
|
|
|
32
|
|
from sklearn.linear_model import LogisticRegression
|
33
|
|
from sklearn.datasets import load_iris
|
34
|
|
iris = load_iris()
|
35
|
|
X = iris.data[:, :2]
|
36
|
|
y = iris.target
|
37
|
|
y[y == 2] = 1
|
38
|
|
lr = LogisticRegression()
|
39
|
|
lr.fit(X, y)
|
40
|
|
|
41
|
|
# grammar is the expected scoring model.
|
42
|
|
from mlprodict.grammar_sklearn import sklearn2graph
|
43
|
|
gr = sklearn2graph(lr, output_names=['Prediction', 'Score'])
|
44
|
|
|
45
|
|
# We can even check what the function should produce as a score.
|
46
|
|
# Types are strict.
|
47
|
|
import numpy
|
48
|
|
X = numpy.array([[numpy.float32(1), numpy.float32(2)]])
|
49
|
|
e2 = gr.execute(Features=X[0, :])
|
50
|
|
print(e2)
|
51
|
|
|
52
|
|
# We display the result in JSON.
|
53
|
|
ser = gr.export(lang='json', hook={'array': lambda v: v.tolist(),
|
54
|
|
'float32': lambda v: float(v)})
|
55
|
|
import json
|
56
|
|
print(json.dumps(ser, sort_keys=True, indent=2))
|
57
|
|
|
58
|
|
For this particular example, the function is calling
|
59
|
|
:func:`sklearn_logistic_regression <mlprodict.grammar_sklearn.sklearn_converters_linear_model.sklearn_logistic_regression>`
|
60
|
|
and the code which produces the model looks like:
|
61
|
|
|
62
|
|
::
|
63
|
|
|
64
|
|
model = LogisticRegression()
|
65
|
|
model.fit(...)
|
66
|
|
|
67
|
|
coef = model.coef_.ravel()
|
68
|
|
bias = numpy.float32(model.intercept_[0])
|
69
|
|
|
70
|
|
gr_coef = MLActionCst(coef)
|
71
|
|
gr_var = MLActionVar(coef, input_names)
|
72
|
|
gr_bias = MLActionCst(bias)
|
73
|
|
gr_dot = MLActionTensorDot(gr_coef, gr_var)
|
74
|
|
gr_dist = MLActionAdd(gr_dot, gr_bias)
|
75
|
|
gr_sign = MLActionSign(gr_dist)
|
76
|
|
gr_conc = MLActionConcat(gr_sign, gr_dist)
|
77
|
|
gr_final = MLModel(gr_conc, output_names, name="LogisticRegression")
|
78
|
|
|
79
|
|
The function interal represents any kind of function into a graph.
|
80
|
|
This graph can easily exported in any format, :epkg:`Python` or any other programming
|
81
|
|
language. The goal is not to evaluate it as it is slow due to the extra
|
82
|
|
checkings ran all along the evaluation to make sure types are consistent.
|
83
|
|
The current implementation supports conversion into C.
|
84
|
|
|
85
|
|
.. runpython::
|
86
|
|
:showcode:
|
87
|
|
|
88
|
|
from sklearn.linear_model import LogisticRegression
|
89
|
|
from sklearn.datasets import load_iris
|
90
|
|
iris = load_iris()
|
91
|
|
X = iris.data[:, :2]
|
92
|
|
y = iris.target
|
93
|
|
y[y == 2] = 1
|
94
|
|
lr = LogisticRegression()
|
95
|
|
lr.fit(X, y)
|
96
|
|
|
97
|
|
# a grammar tree is the expected scoring model.
|
98
|
|
from mlprodict.grammar_sklearn import sklearn2graph
|
99
|
|
gr = sklearn2graph(lr, output_names=['Prediction', 'Score'])
|
100
|
|
|
101
|
|
# We display the result in JSON.
|
102
|
|
ccode = gr.export(lang='c')
|
103
|
|
# We print after a little bit of cleaning.
|
104
|
|
print("\\n".join(_ for _ in ccode['code'].split("\\n") if "//" not in _))
|
105
|
|
|
106
|
|
Function ``adot``, ``sign``, ``concat`` are implemented in module
|
107
|
|
:mod:`mlprodict.grammar_sklearn.cc.c_compilation`. Function
|
108
|
|
:func:`compile_c_function <mlprodict.grammar_sklearn.cc.c_compilation.compile_c_function>`
|
109
|
|
can compile this with :epkg:`cffi`.
|
110
|
|
|
111
|
|
::
|
112
|
|
|
113
|
|
from mlprodict.grammar_sklearn.cc.c_compilation import compile_c_function
|
114
|
|
fct = compile_c_function(code_c, 2)
|
115
|
|
e2 = fct(X[0, :])
|
116
|
|
print(e2)
|
117
|
|
|
118
|
|
The output is the same as the prediction given by *scikit-learn*.
|
119
|
|
"""
|
120
|
1
|
conv = identify_interpreter(model)
|
121
|
1
|
return conv(model, output_names=output_names, **kwargs)
|