Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from flask import Flask, request, jsonify
import psycopg2

app = Flask(__name__)

# Database connection
DB_CONFIG = {
"dbname": "your_db",
"user": "your_user",
"password": "your_password",
"host": "your_host",
"port": "your_port"
}

def connect_db():
try:
conn = psycopg2.connect(**DB_CONFIG)
cursor = conn.cursor()
return conn, cursor
except Exception as e:
print(f"Database connection error: {e}")
return None, None

@app.route('/conversation-summary', methods=['GET'])
def conversation_summary():
conn, cursor = connect_db()
if conn is None:
return jsonify({"error": "Database connection failed"}), 500

cursor.execute("SELECT user_id, COUNT(*) FROM conversations GROUP BY user_id")
data = cursor.fetchall()
conn.close()
return jsonify({"summary": data})

@app.route('/data-stats', methods=['GET'])
def data_stats():
conn, cursor = connect_db()
if conn is None:
return jsonify({"error": "Database connection failed"}), 500

cursor.execute("SELECT COUNT(*) FROM conversations")
count = cursor.fetchone()
conn.close()
return jsonify({"total_conversations": count[0]})

if __name__ == '__main__':
app.run(debug=True)
70 changes: 70 additions & 0 deletions data_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Task 1: Data Pipeline Setup (data_pipeline.py)
import psycopg2
import pandas as pd
import requests

# Database connection
DB_CONFIG = {
"dbname": "your_db",
"user": "your_user",
"password": "your_password",
"host": "your_host",
"port": "your_port"
}

def connect_db():
try:
conn = psycopg2.connect(**DB_CONFIG)
cursor = conn.cursor()
return conn, cursor
except Exception as e:
print(f"Database connection error: {e}")
return None, None

# Create table
def setup_database():
conn, cursor = connect_db()
if conn is None:
return

cursor.execute('''
CREATE TABLE IF NOT EXISTS conversations (
id SERIAL PRIMARY KEY,
user_id VARCHAR(50),
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
query TEXT,
generated_response TEXT
);
''')
conn.commit()
conn.close()
print("Database setup completed.")

# ETL Pipeline
def etl_pipeline():
url = "https://example.com/amazon_reviews.json" # Replace with actual dataset link
try:
response = requests.get(url)
response.raise_for_status()
data = response.json()
df = pd.DataFrame(data)
except Exception as e:
print(f"Error fetching data: {e}")
return

conn, cursor = connect_db()
if conn is None:
return

for _, row in df.iterrows():
cursor.execute("""
INSERT INTO conversations (user_id, query, generated_response)
VALUES (%s, %s, %s)
""", (row['user_id'], row['review_text'], row['response_text']))
conn.commit()
conn.close()
print("ETL Process Completed")

if __name__ == "__main__":
setup_database()
etl_pipeline()
19 changes: 19 additions & 0 deletions web_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import requests
from bs4 import BeautifulSoup

def crawl_wikipedia():
url = "https://en.wikipedia.org/wiki/Natural_language_processing"
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.get_text()
return text[:1000] # Return only first 1000 characters for preview
except Exception as e:
print(f"Error fetching Wikipedia data: {e}")
return None

if __name__ == "__main__":
data = crawl_wikipedia()
if data:
print("Extracted Wikipedia Data:", data)