codes

Q1.(1)

import numpy as np

import matplotlib.pyplot as plt

mean=0

std_dev=1

num_samples=1000

r_num=np.random.normal(mean,std_dev,num_samples)

rounded_num=np.round(r_num).astype(int)

unique_values, frequencies=np.unique(rounded_num, return_counts=True)

print(“Values|Frequencies”)

for value,frequency in zip(unique_values, frequencies):

  print(f”{value:>5}|{frequency:>9}”)

plt.bar(unique_values, frequencies, color=’red’, edgecolor=’blue’)

plt.title(“Frequency Distribution”)

plt.xlabel(“Values”)

plt.ylabel(“Frequencies”)

plt.grid(axis=’y’, linestyle=’–‘, alpha=0.7)

plt.show()

Q1(2)

odd=[]

even=[]

range_first=int(input(“Enter the first number:”))

range_last=int(input(“Enter the last number:”))

Range=range(range_first, range_last+1)

for i in Range:

  if i%2==0:

    even.append(i)

  else:

    odd.append(i)

def is_prime(num):

  if num<2:

    return False

  for n in range(2,int(num**0.5)+1):

    if num%n==0:

      return False

  return True

prime_odd_20=[]

for i in odd:

  if is_prime(i):

    prime_odd_20.append(i)

  if len(prime_odd_20)==20:

    break

print(“The first 20 prime odd number are:”, prime_odd_20)

Q1(3)

import pandas as pd

# Function to read a CSV file using pandas

def read_csv_with_pandas(file_path):

    try:

        data = pd.read_csv(file_path)

        print(data)

    except FileNotFoundError:

        print(“The file was not found.”)

    except pd.errors.EmptyDataError:

        print(“The file is empty.”)

    except Exception as e:

        print(f”An error occurred: {e}”)

# Example usage

file_path = ‘sample.csv’  # Replace with your actual CSV file path

read_csv_with_pandas(file_path)

 

Q2(1)

my_tuple = (42, “Python”, 3.14, True, [1, 2, 3])

reversed_tuple = my_tuple[::-1]

print(“Reversed tuple:”, reversed_tuple)

a, b, c, d, e = my_tuple

print(“Unpacked values:”, a, b, c, d, e)

print(“Third element of tuple:”, my_tuple[2])

print(“Last three elements:”, my_tuple[-3:])

 

Q2(2)

d1 ={“a”:1,”b”:2 ,”c”:3}

d2={“d”:4,”e”:5 ,”f”:6}

print(d1)

print(d2)

merge_d = {*d1,*d2}

print(merge_d)

New_D ={}

key_to_extract =[“a”,”b”]

for key in d1:

    if key in key_to_extract:

        New_D[key]=d1[key]

print(New_D)

 

Q3 and Q4(1)

import numpy as np

A= np.array([[1,2],[3,4]])

B = np.array([[5,6],[7,8]])

print(A)

print(B)

Add = np.add(A,B)

print(Add)

Sub = np.subtract(A,B)

print(Sub)

Multi = np.multiply(A,B)

print(Multi)

Div= np.divide(A,B)

print(Div)

 

Q4(2)

import numpy as np

n = int(input(“Enter the number of equations: “))

A = np.zeros((n, n))

B = np.zeros(n)

for i in range(n):

    for j in range(n):

        A[i][j] = int(input(f”Enter the coefficient {j+1} of equation {i+1}: “))

    B[i] = int(input(f”Enter the constant term of equation {i+1}: “))

print(“Coefficient Matrix (A):”)

print(A)

print(“Constant Terms (B):”)

print(B)

Solution = np.linalg.solve(A,B)

print(f”Solution of LE are: {Solution}”)

 

Q5. 

import pandas as pd

import numpy as np

import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder

# Load dataset

df = sns.load_dataset(‘titanic’)

# Display first few rows

print(“Initial data preview:”)

print(df.head())

# a. Handling missing values

print(“\nMissing values:”)

print(df.isnull().sum())

# Fill missing numerical values with median

df.fillna(df.median(numeric_only=True), inplace=True)

# Fill missing categorical values with mode

for col in df.select_dtypes(include=[‘object’]).columns:

  df[col].fillna(df[col].mode()[0], inplace=True)

# b. Data integration and normalization

# Convert categorical columns to numerical using Label Encoding

label_enc = LabelEncoder()

for col in df.select_dtypes(include=[‘object’]).columns:

  df[col] = label_enc.fit_transform(df[col])

# Normalize numerical features using Min-Max scaling

scaler = MinMaxScaler()

df[df.select_dtypes(include=[‘int64’, ‘float64’]).columns] = scaler.fit_transform(

df.select_dtypes(include=[‘int64’, ‘float64’])

)

# Standardize numerical features using StandardScaler

std_scaler = StandardScaler()

df[df.select_dtypes(include=[‘int64’, ‘float64’]).columns] = std_scaler.fit_transform(

df.select_dtypes(include=[‘int64’, ‘float64’])

)

# Show the transformed data

print(“\nData preview after preprocessing:”)

print(df.head())

 

Q6. 

import pandas as pd

import numpy as np

data = {

    ‘Student_ID’: [1, 2, 3, 4, 5, 6, 7, 8],

    ‘Name’: [‘Alice’, ‘Bob’, ‘Charlie’, ‘David’, ‘Eve’, ‘Frank’, ‘Grace’, ‘Helen’],

    ‘Age’: [15, 16, np.nan, 15, 16, 17, 15, 16],

    ‘Gender’: [‘F’, ‘M’, ‘M’, ‘M’, ‘F’, ‘M’, ‘F’, np.nan],

    ‘Math_score’: [85, 90, np.nan, 75, 60, 95, 80, 88],

    ‘Science_score’: [78, 82, 70, 65, 55, 99, 77, 60],

    ‘English_score’: [88, 87, 75, 70, np.nan, 96, 89, 92]

}

df = pd.DataFrame(data)

df[‘Age’].fillna(df[‘Age’].mean(), inplace=True)

df[‘Math_score’].fillna(df[‘Math_score’].mean(), inplace=True)

df[‘English_score’].fillna(df[‘English_score’].mean(), inplace=True)

df[‘Gender’].fillna(df[‘Gender’].mode()[0], inplace=True)

def cap_outliers(col):

    q1 = col.quantile(0.25)

    q3 = col.quantile(0.75)

    iqr = q3 – q1

    lower = q1 – 1.5 * iqr

    upper = q3 + 1.5 * iqr

    return col.clip(lower, upper)

df[‘Math_score’] = cap_outliers(df[‘Math_score’])

df[‘Science_score’] = cap_outliers(df[‘Science_score’])

df[‘English_score’] = cap_outliers(df[‘English_score’])

print(df)

 

Q7. 

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.preprocessing import MinMaxScaler

data = {

    ‘Student_ID’: [1, 2, 3, 4, 5, 6, 7, 8],

    ‘Name’: [“Alice”, “Bob”, “John”, “Eva”, “Grace”, “helen”, ‘Ivana’, ‘Josh’],

    ‘Age’: [15, 16, np.nan, 15, 16, 17, 15, 16],

    ‘Gender’: [‘F’, ‘M’, ‘M’, ‘M’, ‘F’, ‘M’, ‘F’, ‘M’],

    ‘Math_score’: [85, 90, np.nan, 75, 60, 95, 80, 88],

    ‘Science_score’: [78, 82, 70, 65, 55, 99, 77, 60],

    ‘English_score’: [88, 87, 75, 70, np.nan, 96, 89, 92]

}

df = pd.DataFrame(data)

df[‘Age’].fillna(df[‘Age’].mean(), inplace=True)

df[‘Math_score’].fillna(df[‘Math_score’].mean(), inplace=True)

df[‘Science_score’].fillna(df[‘Science_score’].mean(), inplace=True)

df[‘English_score’].fillna(df[‘English_score’].mean(), inplace=True)

df[‘Gender’].fillna(df[‘Gender’].mode()[0], inplace=True)

df[‘Science_score’] = np.log(df[‘Science_score’])

df[‘English_score’] = np.log(df[‘English_score’])

scaler = MinMaxScaler()

df[‘Math_score_Scaled’] = scaler.fit_transform(df[[‘Math_score’]])

print(df)

 

Q8. 

import matplotlib.pyplot

import pandas as pd

from sklearn.datasets import load_iris

import seaborn as sns

iris = load_iris()

df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

plt.figure(figsize=(10,6))

df.hist(bins=20, figsize=(10,6), edgecolor=’black’)

plt.suptitle(“Histogram”)

plt.show()

plt.figure(figsize=(8,6))

sns.boxplot(data=df)

plt.suptitle(“Boxplot”)

plt.xlabel(“Features”)

plt.ylabel(“Values”)

plt.show()

Q9. 

import matplotlib.pyplot as plt

import numpy as np

import seaborn as sns

import pandas as pd

df = pd.read_csv(‘https://github.com/kb22/Heart-Disease-Prediction/raw/master/dataset.csv’)

print(df.head())

# Scatter Plot

plt.scatter(df[‘age’], df[‘chol’]) # Replace ‘age’ and ‘chol’ with desired columns

plt.xlabel(‘Age’)

df.head()

plt.figure(figsize=(8, 6))

plt.ylabel(‘Cholesterol’)

plt.title(‘Scatter Plot of Age vs. Cholesterol’)

plt.show()

# Bar Plot

plt.figure(figsize=(8, 6))

plt.bar(df[‘sex’].unique(), df[‘sex’].value_counts()) # Replace ‘sex’ with desired column

plt.xlabel(‘Sex’)

plt.ylabel(‘Count’)

plt.title(‘Bar Plot of Sex Distribution’)

plt.show()

# Density Plot

plt.figure(figsize=(8, 6))

sns.kdeplot(df[‘chol’]) # Replace ‘chol’ with desired column

plt.xlabel(‘Cholesterol’)

plt.ylabel(‘Density’)

plt.title(‘Density Plot of Cholesterol’)

plt.show()

# Pie Chart

plt.figure(figsize=(8, 6))

plt.pie(df[‘cp’].value_counts(), labels=df[‘cp’].unique(), autopct=’%1.1f%%’) # Replace ‘cp’

plt.title(‘Pie Chart of Chest Pain Type Distribution’)

plt.show()

# Bubble Plot

plt.figure(figsize=(8, 6))

plt.scatter(df[‘age’], df[‘chol’], s=df[‘trestbps’]*5, alpha=0.5) # Replace with desired colu

plt.xlabel(‘Age’)

plt.ylabel(‘Cholesterol’)

plt.title(‘Bubble Plot of Age vs. Cholesterol (Size: Resting Blood Pressure)’)

plt.show()

# Heatmap

plt.figure(figsize=(10, 8))

sns.heatmap(df.corr(), annot=True, cmap=’coolwarm’)

plt.title(‘Heatmap of Correlation Matrix’)

plt.show()

Q10. 

import pandas as pd

import seaborn as sns

df=sns.load_dataset(“iris”)

print(“First 5 rows of dataset”)

print(“\nMedian Values:”)

print(df.select_dtypes(include=[‘number’]).median())

print(“\nStandard Deviation:”)

print(df.select_dtypes(include=[‘number’]).std())

print(“\nVariance:”)

print(df.select_dtypes(include=[‘number’]).var())

print(“\nMinimum Values:”)

print(df.select_dtypes(include=[‘number’]).min())

print(“\nMaximum Values:”)

print(df.select_dtypes(include=[‘number’]).max())

Q11. 

import numpy as np

import pandas as pd

import scipy.stats as stats

from statsmodels.stats.weightstats import ztest

# Step 1: Generate Synthetic Dataset

data = {

    ‘Group_A’: np.random.normal(loc=50, scale=10, size=30),

    ‘Group_B’: np.random.normal(loc=55, scale=12, size=30),

    ‘Category’: np.random.choice([‘Yes’, ‘No’], size=30, p=[0.6, 0.4])

}

df = pd.DataFrame(data)

print(df.head())

# Step 2: One-Sample T-test

t_stat, p_value = stats.ttest_1samp(df[‘Group_A’], 50)

print(“One-Sample T-Test: t-value =”, t_stat, “p-value =”, p_value)

# Step 3: Independent Two-Sample T-test

t_stat, p_value = stats.ttest_ind(df[‘Group_A’], df[‘Group_B’])

print(“Independent Two-Sample T-Test: t-value =”, t_stat, “p-value =”, p_value)

# Step 4: Paired T-test

df[‘Group_A’] = df[‘Group_A’] + np.random.normal(0.5, size=30)

t_stat, p_value = stats.ttest_rel(df[‘Group_A’], df[‘Group_B’])

print(“Paired T-Test: t-value =”, t_stat, “p-value =”, p_value)

# Step 5: Z-test

z_stat, p_value = ztest(df[‘Group_A’], df[‘Group_B’])

print(“Z-Test: z-value =”, z_stat, “p-value =”, p_value)

# Step 6: Chi-Square Test

df[‘Above_Mean’] = df[‘Group_A’] > df[‘Group_A’].mean()

crosstab = pd.crosstab(df[‘Category’], df[‘Above_Mean’])

chi2_stat, p_value, dof, expected = stats.chi2_contingency(crosstab)

print(“Chi-Square Test: chi2-value =”, chi2_stat, “p-value =”, p_value)

if p_value < 0.05:

    print(“Reject the null hypothesis”)

else:

    print(“Fail to reject the null hypothesis”)

Q12. 

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score

# Sample dataset (for demonstration)

data = {

    “Item_Weight”: [9.3, 5.92, 17.5, 19.2, 8.9, 15.3, 10.5, 7.8, 12.4, 20.1],

    “Item_Visibility”: [0.016, 0.019, 0.017, 0.000, 0.066, 0.035, 0.045, 0.025, 0.012, 0.030],

    “Item_MRP”: [249.8, 48.2, 141.6, 182.1, 53.9, 210.5, 150.3, 200.2, 130.4, 275.0],

    “Outlet_Establishment_Year”: [1999, 2009, 1999, 1998, 1987, 2004, 1995, 2002, 1997, 2010],

    “Item_Outlet_Sales”: [3735.1, 443.4, 2097.3, 732.4, 994.7, 2500.2, 1750.6, 2100.8, 1950.9, 1400.0]

}

# Convert data to DataFrame

df = pd.DataFrame(data)

# Display basic dataset info

print(“\nDataset Overview:\n”, df.head())

# Define features (X) and target variable (y)

X = df[[“Item_Weight”, “Item_Visibility”, “Item_MRP”, “Outlet_Establishment_Year”]]

y = df[“Item_Outlet_Sales”]

# Split data into Training (80%) and Testing (20%) sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model

model = LinearRegression()

model.fit(X_train, y_train)

# Predict sales using the trained model

y_pred = model.predict(X_test)

# Model Performance Evaluation

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print(“\nModel Performance Evaluation:”)

print(f”Mean Squared Error (MSE): {mse:.2f}”)

print(f”R-squared (R^2): {r2:.2f}”)

# Visualizing Actual vs Predicted Sales

plt.figure(figsize=(8, 5))

sns.scatterplot(x=y_test, y=y_pred, color=”blue”, label=”Actual vs Predicted”)

plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color=”red”, linestyle=”–“)

plt.xlabel(“Actual Sales”)

plt.ylabel(“Predicted Sales”)

plt.title(“Actual vs Predicted Sales”)

plt.legend()

plt.show()

# Display model coefficients

feature_importance = pd.DataFrame({‘Feature’: X.columns, ‘Coefficient’: model.coef_})

print(“\nModel Coefficients:\n”, feature_importance)

Q13. 

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score

# Gaussian Naive Bayes with Pima Indians Diabetes Dataset

url = “https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv”

column_names = [“Pregnancies”, “Glucose”, “BloodPressure”, “SkinThickness”, “Insulin”,

                “BMI”, “DiabetesPedigreeFunction”, “Age”, “Outcome”]

df = pd.read_csv(url, names=column_names)

X = df.drop(“Outcome”, axis=1)

y = df[“Outcome”]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

gnb = GaussianNB()

gnb.fit(X_train, y_train)

y_pred = gnb.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f”Accuracy (GaussianNB): {accuracy:.2f}”)

# ——————————-

# Multinomial Naive Bayes for Text Data

from sklearn.naive_bayes import MultinomialNB

from sklearn.feature_extraction.text import CountVectorizer

text_data = [

    “I love programming in python”,

    “Python is great for data science”,

    “I enjoy coding in python”,

    “I am learning machine learning”,

    “Data science is fun with python”,

    “I like playing football”

]

labels = [1, 1, 1, 0, 1, 0]

vectorizer = CountVectorizer()

X = vectorizer.fit_transform(text_data)

y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f”Accuracy (MultinomialNB): {accuracy:.2f}”)

# ——————————-

# Bernoulli Naive Bayes with Binary Features

from sklearn.naive_bayes import BernoulliNB

X = np.array([[1, 0, 1], [1, 1, 1], [0, 1, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0]])

y = np.array([1, 1, 0, 0, 1, 0])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

bnb = BernoulliNB()

bnb.fit(X_train, y_train)

y_pred = bnb.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f”Accuracy (BernoulliNB): {accuracy:.2f}”)

Paste text,images,html and share with anyone
Scroll to Top