# Copyright 2020, OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import Normalizer, StandardScaler

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)


def pipeline():
    """A dummy model that has a bunch of components that we can test."""
    model = Pipeline(
        [
            ("scaler", StandardScaler()),
            ("normal", Normalizer()),
            (
                "union",
                FeatureUnion(
                    [
                        ("pca", PCA(n_components=1)),
                        ("svd", TruncatedSVD(n_components=2)),
                    ],
                    n_jobs=1,  # parallelized components won't generate spans
                ),
            ),
            ("class", RandomForestClassifier(n_estimators=10)),
        ]
    )
    model.fit(X_train, y_train)
    return model


def random_input():
    """A random record from the feature set."""
    rows = X.shape[0]
    random_row = np.random.choice(rows, size=1)
    return X[random_row, :]