from flask import Flask, render_template, request
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize
from collections import Counter
nltk.download('punkt')
nltk.download('stopwords')
app = Flask(__name__)
def preprocess_text(text):
# Tokenize text into words
words = word_tokenize(text.lower())
# Remove stop words
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]
# Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filtered_words]
return stemmed_words
def calculate_similarity(text1, text2):
# Preprocess texts
words1 = preprocess_text(text1)
words2 = preprocess_text(text2)
# Calculate similarity using cosine similarity
vector1 = Counter(words1)
vector2 = Counter(words2)
intersection = set(vector1.keys()) & set(vector2.keys())
numerator = sum([
0 Comments