ex333

import pandas as pd
import numpy as np

# Import the data into a DataFrame
df = pd.read_csv(‘BL-Flickr-Images-Book.csv’)

# Display the first few rows of the DataFrame
print(“Original DataFrame:”)
print(df.head())

# Find and drop the columns which are irrelevant for the book information
irrelevant_columns = [‘Edition Statement’, ‘Corporate Author’, ‘Corporate Contributors’, ‘Former owner’, ‘Engraver’, ‘Contributors’, ‘Issuance type’, ‘Shelfmarks’]
df.drop(columns=irrelevant_columns, inplace=True)

# Change the Index of the DataFrame
df.set_index(‘Identifier’, inplace=True)

# Tidy up fields in the data such as date of publication with the help of simple regular expression
df[‘Date of Publication’] = df[‘Date of Publication’].str.extract(r‘^(\d{4}), expand=False)

# Combine str methods with NumPy to clean columns
df[‘Place of Publication’] = np.where(df[‘Place of Publication’].str.contains(‘London’), ‘London’, df[‘Place of Publication’].str.replace(‘-‘, ‘ ‘))

# Display the cleaned DataFrame
print(\nCleaned DataFrame:”)
print(df.head())

Scroll to Top