from flask import Flask, render_template, request

import nltk

from nltk.tokenize import word_tokenize

from nltk.corpus import stopwords

from nltk.stem import PorterStemmer

from nltk.tokenize import sent_tokenize

from collections import Counter


nltk.download('punkt')

nltk.download('stopwords')


app = Flask(__name__)


def preprocess_text(text):

    # Tokenize text into words

    words = word_tokenize(text.lower())

    

    # Remove stop words

    stop_words = set(stopwords.words('english'))

    filtered_words = [word for word in words if word not in stop_words]

    

    # Stemming

    stemmer = PorterStemmer()

    stemmed_words = [stemmer.stem(word) for word in filtered_words]

    

    return stemmed_words


def calculate_similarity(text1, text2):

    # Preprocess texts

    words1 = preprocess_text(text1)

    words2 = preprocess_text(text2)

    

    # Calculate similarity using cosine similarity

    vector1 = Counter(words1)

    vector2 = Counter(words2)

    

    intersection = set(vector1.keys()) & set(vector2.keys())

    numerator = sum([