diff --git a/.gitignore b/.gitignore index 7bbaed8e992350e66a5d59e7e8739a0c48d085cb..9232e76247712f684b300536d4782dc9099e861b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ -/MovieLenseSmall -/MovieLense20M __pycache__/ .ipynb_checkpoints/ *.ipynb diff --git a/MovieLense20M.dvc b/MovieLense20M.dvc deleted file mode 100644 index 8978b83dfcaeaa5dd08f2fa1a38b16ea160685d6..0000000000000000000000000000000000000000 --- a/MovieLense20M.dvc +++ /dev/null @@ -1,5 +0,0 @@ -outs: -- md5: c6a87cfd78593a7f45605e0156cee479.dir - size: 691847025 - nfiles: 2 - path: MovieLense20M diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5bf17d53176e323452b360116b725beaa208da7d --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,6 @@ +/MoveLense20M +/MoveLense20M +/MoveLense20M +/MovieLenseSmall +/MovieLenseSmall +/MovieLenseSmall diff --git a/data/MoveLense20M.dvc b/data/MoveLense20M.dvc new file mode 100644 index 0000000000000000000000000000000000000000..f047369b282c26830e52137ed4e01624c58b28bc --- /dev/null +++ b/data/MoveLense20M.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 25a20e70e982dc413786f68366518a85.dir + size: 697892721 + nfiles: 3 + path: MoveLense20M diff --git a/MovieLenseSmall.dvc b/data/MovieLenseSmall.dvc similarity index 100% rename from MovieLenseSmall.dvc rename to data/MovieLenseSmall.dvc diff --git a/movies.py b/movies.py index 858d463ed448c8613ee888db080787aef4b54bf0..fa2218ec8e0a66f8fbf4ef620a8d3ca9ffc09138 100644 --- a/movies.py +++ b/movies.py @@ -9,10 +9,10 @@ import pandas as pd def load_movielense(): """Load an return movies and ratings as dataframes""" - ratings_filename = "MovieLense20M/rating.csv" + ratings_filename = "data/MovieLense20M/rating.csv" ratings_df = pd.read_csv(ratings_filename) - movies_filename = "MovieLense20M/movie.csv" + movies_filename = "data/MovieLense20M/movie.csv" movies_df = pd.read_csv(movies_filename) return ratings_df, movies_df