Remove some print statements within algorithmic functions (#7499)

* Remove commented-out print statements in algorithmic functions * Encapsulate non-algorithmic code in __main__ * Remove unused print_matrix function * Remove print statement in __init__ * Remove print statement from doctest * Encapsulate non-algorithmic code in __main__ * Modify algorithm to return instead of print * Encapsulate non-algorithmic code in __main__ * Refactor data_safety_checker to return instead of print * updating DIRECTORY.md * updating DIRECTORY.md * Apply suggestions from code review * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-07-05 09:21:13 +08:00 · 2022-10-22 07:33:51 -04:00
parent 717f0e46d9
commit cc10b20beb
10 changed files with 69 additions and 64 deletions
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@ -1,7 +1,7 @@
 """
 this is code for forecasting
 but i modified it and used it for safety checker of data
-for ex: you have a online shop and for some reason some data are
+for ex: you have an online shop and for some reason some data are
 missing (the amount of data that u expected are not supposed to be)
        then we can use it
 *ps : 1. ofc we can use normal statistic method but in this case
@ -91,14 +91,14 @@ def interquartile_range_checker(train_user: list) -> float:
    return low_lim


-def data_safety_checker(list_vote: list, actual_result: float) -> None:
+def data_safety_checker(list_vote: list, actual_result: float) -> bool:
    """
    Used to review all the votes (list result prediction)
    and compare it to the actual result.
    input : list of predictions
    output : print whether it's safe or not
-    >>> data_safety_checker([2,3,4],5.0)
-    Today's data is not safe.
+    >>> data_safety_checker([2, 3, 4], 5.0)
+    False
    """
    safe = 0
    not_safe = 0
@ -107,50 +107,54 @@ def data_safety_checker(list_vote: list, actual_result: float) -> None:
            safe = not_safe + 1
        else:
            if abs(abs(i) - abs(actual_result)) <= 0.1:
-                safe = safe + 1
+                safe += 1
            else:
-                not_safe = not_safe + 1
-    print(f"Today's data is {'not ' if safe <= not_safe else ''}safe.")
+                not_safe += 1
+    return safe > not_safe


-# data_input_df = pd.read_csv("ex_data.csv", header=None)
-data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-data_input_df = pd.DataFrame(data_input, columns=["total_user", "total_even", "days"])
+if __name__ == "__main__":
+    # data_input_df = pd.read_csv("ex_data.csv", header=None)
+    data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
+    data_input_df = pd.DataFrame(
+        data_input, columns=["total_user", "total_even", "days"]
+    )

-"""
-data column = total user in a day, how much online event held in one day,
-what day is that(sunday-saturday)
-"""
+    """
+    data column = total user in a day, how much online event held in one day,
+    what day is that(sunday-saturday)
+    """

-# start normalization
-normalize_df = Normalizer().fit_transform(data_input_df.values)
-# split data
-total_date = normalize_df[:, 2].tolist()
-total_user = normalize_df[:, 0].tolist()
-total_match = normalize_df[:, 1].tolist()
+    # start normalization
+    normalize_df = Normalizer().fit_transform(data_input_df.values)
+    # split data
+    total_date = normalize_df[:, 2].tolist()
+    total_user = normalize_df[:, 0].tolist()
+    total_match = normalize_df[:, 1].tolist()

-# for svr (input variable = total date and total match)
-x = normalize_df[:, [1, 2]].tolist()
-x_train = x[: len(x) - 1]
-x_test = x[len(x) - 1 :]
+    # for svr (input variable = total date and total match)
+    x = normalize_df[:, [1, 2]].tolist()
+    x_train = x[: len(x) - 1]
+    x_test = x[len(x) - 1 :]

-# for linear reression & sarimax
-trn_date = total_date[: len(total_date) - 1]
-trn_user = total_user[: len(total_user) - 1]
-trn_match = total_match[: len(total_match) - 1]
+    # for linear regression & sarimax
+    trn_date = total_date[: len(total_date) - 1]
+    trn_user = total_user[: len(total_user) - 1]
+    trn_match = total_match[: len(total_match) - 1]

-tst_date = total_date[len(total_date) - 1 :]
-tst_user = total_user[len(total_user) - 1 :]
-tst_match = total_match[len(total_match) - 1 :]
+    tst_date = total_date[len(total_date) - 1 :]
+    tst_user = total_user[len(total_user) - 1 :]
+    tst_match = total_match[len(total_match) - 1 :]

+    # voting system with forecasting
+    res_vote = [
+        linear_regression_prediction(
+            trn_date, trn_user, trn_match, tst_date, tst_match
+        ),
+        sarimax_predictor(trn_user, trn_match, tst_match),
+        support_vector_regressor(x_train, x_test, trn_user),
+    ]

-# voting system with forecasting
-res_vote = []
-res_vote.append(
-    linear_regression_prediction(trn_date, trn_user, trn_match, tst_date, tst_match)
-)
-res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match))
-res_vote.append(support_vector_regressor(x_train, x_test, trn_user))
-
-# check the safety of todays'data^^
-data_safety_checker(res_vote, tst_user)
+    # check the safety of today's data
+    not_str = "" if data_safety_checker(res_vote, tst_user) else "not "
+    print("Today's data is {not_str}safe.")