diff --git a/qgram.py b/qgram.py index 158b51a49277dc91d9d6ffa781ea3d6294394d9e..24d62fa47fe151d20dcc7eecf1943e312dbda088 100644 --- a/qgram.py +++ b/qgram.py @@ -40,7 +40,7 @@ class QGramIndex: jaccard = count / (n_term + n_query - count) result.append((jaccard, term)) - result.sort(key=lambda x: x[0]) + result.sort(key=lambda x: -x[0]) return result @@ -55,7 +55,7 @@ class QGramIndex: The beginning and end of the string is denoted by a dollar sign ($). If the term is too short, the qgrams might be shorter than q. """ - term = f"${term}$" + term = f"${term}$".lower() qgrams = [] if len(term) < self.q_param: diff --git a/test_qgram.py b/test_qgram.py index 01b4fba0d5d2b3dd22ad0a3ce672a821c15e4f1f..88fcf2dda730aa36fb61df65c61e8bebd0b12e8f 100644 --- a/test_qgram.py +++ b/test_qgram.py @@ -9,6 +9,12 @@ from qgram import QGramIndex class QGramChunkTest(unittest.TestCase): """Test the implementation""" + def test_case(self): + """Test the chunk converts to lower case""" + index = QGramIndex() + qgrams = index._chunk("WeaTHer") + self.assertEqual(qgrams, ["$we", "wea", "eat", "ath", "the", "her", "er$"]) + def test_long(self): """Test the chunk method with a long string""" index = QGramIndex()