16 lines
481 B
Python
16 lines
481 B
Python
import math
|
|
|
|
|
|
def idf(posting, document_count):
|
|
"""A function to calculate the inverse document frequency for a posting.
|
|
This is shared between the builder and the index.
|
|
"""
|
|
documents_with_term = 0
|
|
for field_name in posting:
|
|
if field_name == "_index":
|
|
continue
|
|
documents_with_term += len(posting[field_name].keys())
|
|
|
|
x = (document_count - documents_with_term + 0.5) / (documents_with_term + 0.5)
|
|
return math.log(1 + abs(x))
|