diff --git a/.dvc/config b/.dvc/config
index 4c726213678f6465ef97b68b715f8999a06eb7bb..43096e3a953538dc31a69d2b2dc3042ae06101a9 100644
--- a/.dvc/config
+++ b/.dvc/config
@@ -1,3 +1,5 @@
+[core]
+    remote = amazonas
 ['remote "amazonas"']
     url = s3://mlflow/
     endpointurl = https://s3.ds.sit-servers.net
diff --git a/.gitignore b/.gitignore
index f163a26b38e7b9c5e6450a40e0c4ebf21f979405..7bbaed8e992350e66a5d59e7e8739a0c48d085cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
 /MovieLenseSmall
 /MovieLense20M
+__pycache__/
+.ipynb_checkpoints/
+*.ipynb
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..24388e06c211e3840a511858c0d9ec46f4e8975b
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,18 @@
+
+stages:
+ - test
+
+
+################################################################################
+# Unittest
+.unittest: &unittest_template
+  stage: test
+  script:
+    - pip install -r requirements.txt
+    - pytest
+
+unittest:py3.10:
+  <<: *unittest_template
+  image: python:3.10
+
+
diff --git a/movies.py b/movies.py
new file mode 100644
index 0000000000000000000000000000000000000000..3212de3eadc7d5f42fcc00e29de6f508bcfc61d2
--- /dev/null
+++ b/movies.py
@@ -0,0 +1,29 @@
+
+import collections
+import pandas as pd
+import tensorflow as tf
+
+def load_movielense():
+    """Load an return movies and ratings as dataframes"""
+    ratings_filename = "MovieLense20M/rating.csv"
+    ratings_df = pd.read_csv(ratings_filename)
+
+    movies_filename = "MovieLense20M/movie.csv"
+    movies_df = pd.read_csv(movies_filename)
+
+    return ratings_df, movies_df
+
+def collect_user_context(ratings, min_rating=2.1):
+    """Create a per-user rating list"""
+    user_movies = collections.defaultdict(lambda: [])  # dict mapping ids to movies
+
+    ratings = ratings.sort_values(by=['userId', 'timestamp'])
+
+    for user_id, movie_id, rating, _ in ratings.values:
+        if rating >= min_rating:
+            user_movies[user_id].append(movie_id)
+
+    return user_movies
+
+
+
diff --git a/movies_test.py b/movies_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..a37ae83eb831cc7b31416311b246ae6b07eb562a
--- /dev/null
+++ b/movies_test.py
@@ -0,0 +1,28 @@
+
+import pandas as pd
+import unittest
+import movies
+
+class LoadTests(unittest.TestCase):
+    """Test functions concerned with loading and preparing the dataset"""
+
+    @staticmethod
+    def toy_ratings():
+        """Return a toy dataframe with ratings"""
+        return pd.DataFrame(data={
+            "userId": [1, 1, 2, 2, 1],
+            "movieId": [1, 2, 1, 3, 4],
+            "rating": [3, 1, 4, 3, 5],
+            "timestamp": [1, 2, 7, 5, 4],
+        },
+        columns=["userId", "movieId", "rating", "timestamp"])
+    
+    def test_collect_user_context(self):
+        """Check that ratings are correctly aggregated"""
+        rating = self.toy_ratings()
+        user_movies = movies.collect_user_context(rating)
+        
+        self.assertEqual(user_movies[1], [1, 4]) 
+        self.assertEqual(user_movies[2], [3, 1]) 
+        self.assertEqual(set(user_movies.keys()), {1, 2})
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c4c279778d0160b584c489ca6f5bb65a9efdc74
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow-recommenders
+tensorflow-datasets