neww

Q1.(1)
import numpy as np
import matplotlib.pyplot as plt
mean=0
std_dev=1
num_samples=1000
r_num=np.random.normal(mean,std_dev,num_samples)
rounded_num=np.round(r_num).astype(int)
unique_values, frequencies=np.unique(rounded_num, return_counts=True)
print(“Values|Frequencies”)
for value,frequency in zip(unique_values, frequencies):
  print(f{value:>5}|{frequency:>9})
plt.bar(unique_values, frequencies, color=‘red’, edgecolor=‘blue’)
plt.title(“Frequency Distribution”)
plt.xlabel(“Values”)
plt.ylabel(“Frequencies”)
plt.grid(axis=‘y’, linestyle=‘–‘, alpha=0.7)
plt.show()

Q1(2)
odd=[]
even=[]
range_first=int(input(“Enter the first number:”))
range_last=int(input(“Enter the last number:”))
Range=range(range_first, range_last+1)
for i in Range:
  if i%2==0:
    even.append(i)
  else:
    odd.append(i)
def is_prime(num):
  if num<2:
    return False
  for n in range(2,int(num**0.5)+1):
    if num%n==0:
      return False
  return True
prime_odd_20=[]
for i in odd:
  if is_prime(i):
    prime_odd_20.append(i)
  if len(prime_odd_20)==20:
    break
print(“The first 20 prime odd number are:”, prime_odd_20)

Q1(3)
import pandas as pd

# Function to read a CSV file using pandas
def read_csv_with_pandas(file_path):
    try:
        data = pd.read_csv(file_path)
        print(data)
    except FileNotFoundError:
        print(“The file was not found.”)
    except pd.errors.EmptyDataError:
        print(“The file is empty.”)
    except Exception as e:
        print(f“An error occurred: {e})

# Example usage
file_path = ‘sample.csv’  # Replace with your actual CSV file path
read_csv_with_pandas(file_path)

 

Q2(1)
my_tuple = (42, “Python”, 3.14, True, [1, 2, 3])
reversed_tuple = my_tuple[::-1]
print(“Reversed tuple:”, reversed_tuple)
a, b, c, d, e = my_tuple
print(“Unpacked values:”, a, b, c, d, e)
print(“Third element of tuple:”, my_tuple[2])
print(“Last three elements:”, my_tuple[-3:])

 

Q2(2)

d1 ={“a”:1,“b”:2 ,“c”:3}
d2={“d”:4,“e”:5 ,“f”:6}
print(d1)
print(d2)

merge_d = {*d1,*d2}
print(merge_d)

New_D ={}
key_to_extract =[“a”,“b”]
for key in d1:
    if key in key_to_extract:
        New_D[key]=d1[key]
print(New_D)

 

Q3 and Q4(1)

import numpy as np

A= np.array([[1,2],[3,4]])
B = np.array([[5,6],[7,8]])
print(A)
print(B)

Add = np.add(A,B)
print(Add)

Sub = np.subtract(A,B)
print(Sub)

Multi = np.multiply(A,B)
print(Multi)

Div= np.divide(A,B)
print(Div)

 

Q4(2)

import numpy as np
n = int(input(“Enter the number of equations: “))
A = np.zeros((n, n))
B = np.zeros(n)
for i in range(n):
    for j in range(n):
        A[i][j] = int(input(f“Enter the coefficient {j+1} of equation {i+1}: “))
    B[i] = int(input(f“Enter the constant term of equation {i+1}: “))
print(“Coefficient Matrix (A):”)
print(A)
print(“Constant Terms (B):”)
print(B)
Solution = np.linalg.solve(A,B)
print(f“Solution of LE are: {Solution})

Q5.

import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
# Load dataset
df = sns.load_dataset(‘titanic’)
# Display first few rows
print(“Initial data preview:”)
print(df.head())
# a. Handling missing values
print(“\nMissing values:”)
print(df.isnull().sum())
# Fill missing numerical values with median
df.fillna(df.median(numeric_only=True), inplace=True)
# Fill missing categorical values with mode
for col in df.select_dtypes(include=[‘object’]).columns:
  df[col].fillna(df[col].mode()[0], inplace=True)
# b. Data integration and normalization

# Convert categorical columns to numerical using Label Encoding
label_enc = LabelEncoder()
for col in df.select_dtypes(include=[‘object’]).columns:
  df[col] = label_enc.fit_transform(df[col])
# Normalize numerical features using Min-Max scaling
scaler = MinMaxScaler()
df[df.select_dtypes(include=[‘int64’, ‘float64’]).columns] = scaler.fit_transform(
df.select_dtypes(include=[‘int64’, ‘float64’])
)
# Standardize numerical features using StandardScaler
std_scaler = StandardScaler()
df[df.select_dtypes(include=[‘int64’, ‘float64’]).columns] = std_scaler.fit_transform(
df.select_dtypes(include=[‘int64’, ‘float64’])
)
# Show the transformed data
print(“\nData preview after preprocessing:”)
print(df.head())

Q6.

import pandas as pd
import numpy as np
data = {
    ‘Student_ID’: [1, 2, 3, 4, 5, 6, 7, 8],
    ‘Name’: [‘Alice’, ‘Bob’, ‘Charlie’, ‘David’, ‘Eve’, ‘Frank’, ‘Grace’, ‘Helen’],
    ‘Age’: [15, 16, np.nan, 15, 16, 17, 15, 16],
    ‘Gender’: [‘F’, ‘M’, ‘M’, ‘M’, ‘F’, ‘M’, ‘F’, np.nan],
    ‘Math_score’: [85, 90, np.nan, 75, 60, 95, 80, 88],
    ‘Science_score’: [78, 82, 70, 65, 55, 99, 77, 60],
    ‘English_score’: [88, 87, 75, 70, np.nan, 96, 89, 92]
}
df = pd.DataFrame(data)
df[‘Age’].fillna(df[‘Age’].mean(), inplace=True)
df[‘Math_score’].fillna(df[‘Math_score’].mean(), inplace=True)
df[‘English_score’].fillna(df[‘English_score’].mean(), inplace=True)
df[‘Gender’].fillna(df[‘Gender’].mode()[0], inplace=True)
def cap_outliers(col):
    q1 = col.quantile(0.25)
    q3 = col.quantile(0.75)
    iqr = q3q1
    lower = q11.5 * iqr
    upper = q3 + 1.5 * iqr
    return col.clip(lower, upper)
df[‘Math_score’] = cap_outliers(df[‘Math_score’])
df[‘Science_score’] = cap_outliers(df[‘Science_score’])
df[‘English_score’] = cap_outliers(df[‘English_score’])
print(df)
 

Q7.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
data = {
    ‘Student_ID’: [1, 2, 3, 4, 5, 6, 7, 8],
    ‘Name’: [“Alice”, “Bob”, “John”, “Eva”, “Grace”, “helen”, ‘Ivana’, ‘Josh’],
    ‘Age’: [15, 16, np.nan, 15, 16, 17, 15, 16],
    ‘Gender’: [‘F’, ‘M’, ‘M’, ‘M’, ‘F’, ‘M’, ‘F’, ‘M’],
    ‘Math_score’: [85, 90, np.nan, 75, 60, 95, 80, 88],
    ‘Science_score’: [78, 82, 70, 65, 55, 99, 77, 60],
    ‘English_score’: [88, 87, 75, 70, np.nan, 96, 89, 92]
}
df = pd.DataFrame(data)
df[‘Age’].fillna(df[‘Age’].mean(), inplace=True)
df[‘Math_score’].fillna(df[‘Math_score’].mean(), inplace=True)
df[‘Science_score’].fillna(df[‘Science_score’].mean(), inplace=True)
df[‘English_score’].fillna(df[‘English_score’].mean(), inplace=True)
df[‘Gender’].fillna(df[‘Gender’].mode()[0], inplace=True)
df[‘Science_score’] = np.log(df[‘Science_score’])
df[‘English_score’] = np.log(df[‘English_score’])
scaler = MinMaxScaler()
df[‘Math_score_Scaled’] = scaler.fit_transform(df[[‘Math_score’]])
print(df)
 

Q8.
import matplotlib.pyplot
import pandas as pd
from sklearn.datasets import load_iris
import seaborn as sns
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
plt.figure(figsize=(10,6))
df.hist(bins=20, figsize=(10,6), edgecolor=‘black’)
plt.suptitle(“Histogram”)
plt.show()
plt.figure(figsize=(8,6))
sns.boxplot(data=df)
plt.suptitle(“Boxplot”)
plt.xlabel(“Features”)
plt.ylabel(“Values”)
plt.show()

Q9.
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
df = pd.read_csv(‘https://github.com/kb22/Heart-Disease-Prediction/raw/master/dataset.csv’)
print(df.head())
# Scatter Plot
plt.scatter(df[‘age’], df[‘chol’]) # Replace ‘age’ and ‘chol’ with desired columns
plt.xlabel(‘Age’)
df.head()
plt.figure(figsize=(8, 6))
plt.ylabel(‘Cholesterol’)
plt.title(‘Scatter Plot of Age vs. Cholesterol’)
plt.show()
# Bar Plot
plt.figure(figsize=(8, 6))
plt.bar(df[‘sex’].unique(), df[‘sex’].value_counts()) # Replace ‘sex’ with desired column
plt.xlabel(‘Sex’)
plt.ylabel(‘Count’)
plt.title(‘Bar Plot of Sex Distribution’)
plt.show()
# Density Plot
plt.figure(figsize=(8, 6))
sns.kdeplot(df[‘chol’]) # Replace ‘chol’ with desired column
plt.xlabel(‘Cholesterol’)
plt.ylabel(‘Density’)
plt.title(‘Density Plot of Cholesterol’)
plt.show()
# Pie Chart
plt.figure(figsize=(8, 6))
plt.pie(df[‘cp’].value_counts(), labels=df[‘cp’].unique(), autopct=‘%1.1f%%’) # Replace ‘cp’
plt.title(‘Pie Chart of Chest Pain Type Distribution’)
plt.show()
# Bubble Plot
plt.figure(figsize=(8, 6))
plt.scatter(df[‘age’], df[‘chol’], s=df[‘trestbps’]*5, alpha=0.5) # Replace with desired colu
plt.xlabel(‘Age’)
plt.ylabel(‘Cholesterol’)
plt.title(‘Bubble Plot of Age vs. Cholesterol (Size: Resting Blood Pressure)’)
plt.show()
# Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap=‘coolwarm’)
plt.title(‘Heatmap of Correlation Matrix’)
plt.show()

Q10.
import pandas as pd
import seaborn as sns
df=sns.load_dataset(“iris”)
print(“First 5 rows of dataset”)
print(“\nMedian Values:”)
print(df.select_dtypes(include=[‘number’]).median())
print(“\nStandard Deviation:”)
print(df.select_dtypes(include=[‘number’]).std())
print(“\nVariance:”)
print(df.select_dtypes(include=[‘number’]).var())
print(“\nMinimum Values:”)
print(df.select_dtypes(include=[‘number’]).min())
print(“\nMaximum Values:”)
print(df.select_dtypes(include=[‘number’]).max())

Q11.
import numpy as np
import pandas as pd
import scipy.stats as stats
from statsmodels.stats.weightstats import ztest

# Step 1: Generate Synthetic Dataset
data = {
    ‘Group_A’: np.random.normal(loc=50, scale=10, size=30),
    ‘Group_B’: np.random.normal(loc=55, scale=12, size=30),
    ‘Category’: np.random.choice([‘Yes’, ‘No’], size=30, p=[0.6, 0.4])
}

df = pd.DataFrame(data)
print(df.head())

# Step 2: One-Sample T-test
t_stat, p_value = stats.ttest_1samp(df[‘Group_A’], 50)
print(“One-Sample T-Test: t-value =”, t_stat, “p-value =”, p_value)

# Step 3: Independent Two-Sample T-test
t_stat, p_value = stats.ttest_ind(df[‘Group_A’], df[‘Group_B’])
print(“Independent Two-Sample T-Test: t-value =”, t_stat, “p-value =”, p_value)

# Step 4: Paired T-test
df[‘Group_A’] = df[‘Group_A’] + np.random.normal(0.5, size=30)
t_stat, p_value = stats.ttest_rel(df[‘Group_A’], df[‘Group_B’])
print(“Paired T-Test: t-value =”, t_stat, “p-value =”, p_value)

# Step 5: Z-test
z_stat, p_value = ztest(df[‘Group_A’], df[‘Group_B’])
print(“Z-Test: z-value =”, z_stat, “p-value =”, p_value)

# Step 6: Chi-Square Test
df[‘Above_Mean’] = df[‘Group_A’] > df[‘Group_A’].mean()
crosstab = pd.crosstab(df[‘Category’], df[‘Above_Mean’])
chi2_stat, p_value, dof, expected = stats.chi2_contingency(crosstab)
print(“Chi-Square Test: chi2-value =”, chi2_stat, “p-value =”, p_value)

if p_value < 0.05:
    print(“Reject the null hypothesis”)
else:
    print(“Fail to reject the null hypothesis”)

Q12.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Sample dataset (for demonstration)
data = {
    “Item_Weight”: [9.3, 5.92, 17.5, 19.2, 8.9, 15.3, 10.5, 7.8, 12.4, 20.1],
    “Item_Visibility”: [0.016, 0.019, 0.017, 0.000, 0.066, 0.035, 0.045, 0.025, 0.012, 0.030],
    “Item_MRP”: [249.8, 48.2, 141.6, 182.1, 53.9, 210.5, 150.3, 200.2, 130.4, 275.0],
    “Outlet_Establishment_Year”: [1999, 2009, 1999, 1998, 1987, 2004, 1995, 2002, 1997, 2010],
    “Item_Outlet_Sales”: [3735.1, 443.4, 2097.3, 732.4, 994.7, 2500.2, 1750.6, 2100.8, 1950.9, 1400.0]
}

# Convert data to DataFrame
df = pd.DataFrame(data)

# Display basic dataset info
print(“\nDataset Overview:\n”, df.head())

# Define features (X) and target variable (y)
X = df[[“Item_Weight”, “Item_Visibility”, “Item_MRP”, “Outlet_Establishment_Year”]]
y = df[“Item_Outlet_Sales”]

# Split data into Training (80%) and Testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict sales using the trained model
y_pred = model.predict(X_test)

# Model Performance Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(“\nModel Performance Evaluation:”)
print(f“Mean Squared Error (MSE): {mse:.2f})
print(f“R-squared (R^2): {r2:.2f})

# Visualizing Actual vs Predicted Sales
plt.figure(figsize=(8, 5))
sns.scatterplot(x=y_test, y=y_pred, color=“blue”, label=“Actual vs Predicted”)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color=“red”, linestyle=“–“)
plt.xlabel(“Actual Sales”)
plt.ylabel(“Predicted Sales”)
plt.title(“Actual vs Predicted Sales”)
plt.legend()
plt.show()

# Display model coefficients
feature_importance = pd.DataFrame({‘Feature’: X.columns, ‘Coefficient’: model.coef_})
print(“\nModel Coefficients:\n”, feature_importance)

Q13.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Gaussian Naive Bayes with Pima Indians Diabetes Dataset
url = “https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv”
column_names = [“Pregnancies”, “Glucose”, “BloodPressure”, “SkinThickness”, “Insulin”,
                “BMI”, “DiabetesPedigreeFunction”, “Age”, “Outcome”]
df = pd.read_csv(url, names=column_names)

X = df.drop(“Outcome”, axis=1)
y = df[“Outcome”]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f“Accuracy (GaussianNB): {accuracy:.2f})

# ——————————-
# Multinomial Naive Bayes for Text Data
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer

text_data = [
    “I love programming in python”,
    “Python is great for data science”,
    “I enjoy coding in python”,
    “I am learning machine learning”,
    “Data science is fun with python”,
    “I like playing football”
]
labels = [1, 1, 1, 0, 1, 0]

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(text_data)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f“Accuracy (MultinomialNB): {accuracy:.2f})

# ——————————-
# Bernoulli Naive Bayes with Binary Features
from sklearn.naive_bayes import BernoulliNB

X = np.array([[1, 0, 1], [1, 1, 1], [0, 1, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0]])
y = np.array([1, 1, 0, 0, 1, 0])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

bnb = BernoulliNB()
bnb.fit(X_train, y_train)
y_pred = bnb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f“Accuracy (BernoulliNB): {accuracy:.2f})
Paste text,images,html and share with anyone
Scroll to Top