Files
2020-11-07 13:32:10 -05:00

55 lines
1.8 KiB
Python

# Copyright 2020, OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import Normalizer, StandardScaler
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
def pipeline():
"""A dummy model that has a bunch of components that we can test."""
model = Pipeline(
[
("scaler", StandardScaler()),
("normal", Normalizer()),
(
"union",
FeatureUnion(
[
("pca", PCA(n_components=1)),
("svd", TruncatedSVD(n_components=2)),
],
n_jobs=1, # parallelized components won't generate spans
),
),
("class", RandomForestClassifier(n_estimators=10)),
]
)
model.fit(X_train, y_train)
return model
def random_input():
"""A random record from the feature set."""
rows = X.shape[0]
random_row = np.random.choice(rows, size=1)
return X[random_row, :]