What is the length of the Bertopic default dataset from sklearn?
from sklearn.datasets import fetch_20newsgroups
docs = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))['data']
len_docs = []
for doc in docs:
len_docs.append(len(doc))
print("Num of docs")
print(len(docs))
print("Average Length of doc")
print(sum(len_docs) / len(len_docs))