add tests

georgia-tech-db · americast · Sep 30, 2023 · Sep 13, 2023 · Sep 13, 2023 · Sep 13, 2023
commit 52c563e2d81d27e68df9956ccf084a0b9f480d6f
diff --git a/docs/source/reference/ai/model-forecasting.rst b/docs/source/reference/ai/model-forecasting.rst
@@ -53,8 +53,12 @@ EvaDB's default forecast framework is `statsforecast <https://nixtla.github.io/s
      - The name of the column that contains the datestamp, wihch should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp. Please visit the `pandas documentation <https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html>`_ for details. If not provided, an auto increasing ID column will be used.
    * - ID
      - The name of column that represents an identifier for the series. If not provided, the whole table is considered as one series of data.
+   * - LIBRARY
+     - We can select one of `statsforecast` (default) or `neuralforecast`. `statsforecast` provides access to statistical forecasting methods, while `neuralforecast` gives access to deep-learning based forecasting methods.
    * - MODEL
-     - We can select one of AutoARIMA, AutoCES, AutoETS, AutoTheta. The default is AutoARIMA. Check `Automatic Forecasting <https://nixtla.github.io/statsforecast/src/core/models_intro.html#automatic-forecasting>`_ to learn details about these models.
+     - If LIBRARY is `statsforecast`, we can select one of AutoARIMA, AutoCES, AutoETS, AutoTheta. The default is AutoARIMA. Check `Automatic Forecasting <https://nixtla.github.io/statsforecast/src/core/models_intro.html#automatic-forecasting>`_ to learn details about these models. If LIBRARY is `neuralforecast`, we can select one of NHITS or NBEATS. The default is NBEATS. Check `Automatic Forecasting <https://nixtla.github.io/neuralforecast/models.nbeats.html>`_ for details.
+   * - EXOGENOUS
+     - The names of columns to be treated as exogenous variables, separated by comma. These columns would be considered for forecasting by the backend only for LIBRARY `neuralforecast`.
    * - Frequency
      - A string indicating the frequency of the data. The common used ones are D, W, M, Y, which repestively represents day-, week-, month- and year- end frequency. The default value is M. Check `pandas available frequencies <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_ for all available frequencies.
 

diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py
@@ -244,7 +244,7 @@ def handle_forecasting_function(self):
         """
             Set or infer data frequency
         """
-        
+
         if "frequency" not in arg_map.keys():
             arg_map["frequency"] = pd.infer_freq(data["ds"])
         frequency = arg_map["frequency"]
@@ -297,19 +297,21 @@ def handle_forecasting_function(self):
                 raise FunctionIODefinitionError(err_msg)
             model_args = {}
             if "exogenous" in arg_map.keys():
-                exogenous_args = [x.strip() for x in arg_map["exogenous"].strip().split(",")]
+                exogenous_args = [
+                    x.strip() for x in arg_map["exogenous"].strip().split(",")
+                ]
                 model_args["hist_exog_list"] = exogenous_args
 
             if "auto" not in arg_map["model"].lower():
-                model_args["input_size"] = 2*horizon
+                model_args["input_size"] = 2 * horizon
                 model_args["max_steps"] = 50
-            
+
             model_args["h"] = horizon
 
             model = NeuralForecast(
-                    [model_here(**model_args)],
-                    freq=new_freq,
-                )
+                [model_here(**model_args)],
+                freq=new_freq,
+            )
 
         # """
         #     Statsforecast implementation
@@ -335,8 +337,6 @@ def handle_forecasting_function(self):
                 logger.error(err_msg)
                 raise FunctionIODefinitionError(err_msg)
 
-
-
             else:
                 model = StatsForecast(
                     [model_here(season_length=season_length)], freq=new_freq
@@ -346,22 +346,18 @@ def handle_forecasting_function(self):
 
         encoding_text = data.to_string()
         if "exogenous" in arg_map.keys():
-            encoding_text += "exogenous_"+str(sorted(exogenous_args))
+            encoding_text += "exogenous_" + str(sorted(exogenous_args))
 
         model_dir = os.path.join(
             self.db.config.get_value("storage", "model_dir"),
             self.node.name,
             library,
             arg_map["model"],
-            str(hashlib.sha256(encoding_text.encode()).hexdigest())
+            str(hashlib.sha256(encoding_text.encode()).hexdigest()),
         )
         Path(model_dir).mkdir(parents=True, exist_ok=True)
 
-        model_save_name = (
-            "horizon"
-            + str(horizon)
-            + ".pkl"
-        )
+        model_save_name = "horizon" + str(horizon) + ".pkl"
 
         model_path = os.path.join(model_dir, model_save_name)
 

diff --git a/setup.py b/setup.py
@@ -120,6 +120,7 @@ def read(path, encoding="utf-8"):
 
 forecasting_libs = [
     "statsforecast" # MODEL TRAIN AND FINE TUNING
+    "neuralforecast" # MODEL TRAIN AND FINE TUNING
 ]
 
 ### NEEDED FOR DEVELOPER TESTING ONLY

diff --git a/test/integration_tests/long/test_model_forecasting.py b/test/integration_tests/long/test_model_forecasting.py
@@ -30,7 +30,6 @@ def setUpClass(cls):
         # reset the catalog manager before running each test
         cls.evadb.catalog().reset()
 
-
         create_table_query = """
             CREATE TABLE AirData (\
             unique_id TEXT(30),\
@@ -116,7 +115,8 @@ def test_forecast(self):
         result = execute_query_fetch_all(self.evadb, predict_query)
         self.assertEqual(len(result), 12)
         self.assertEqual(
-            result.columns, ["airpanelforecast.unique_id", "airpanelforecast.ds", "airpanelforecast.y"]
+            result.columns,
+            ["airpanelforecast.unique_id", "airpanelforecast.ds", "airpanelforecast.y"],
         )
 
     @forecast_skip_marker