All about DataSince, DataEngineering and ComputerScience
View the Project on GitHub datainsightat/DataScience_Examples
df_list = []
# Load each object from s3
for file in request_files:
s3_day_reqs = s3.get_object(Bucket='gid-requests',
Key=file['Key'])
# Read the DataFrame into pandas, append it to the list
day_reqs = pd.read_csv(s3_day_reqs['Body'])
df_list.append(day_reqs)
# Concatenate all the DataFrames in the list
all_reqs = pd.concat(df_list)
# Preview the DataFrame
all_reqs.head()
# Write agg_df to a CSV and HTML file with no border
agg_df.to_csv('./feb_final_report.csv')
agg_df.to_html('./feb_final_report.html', border=0)
# Upload the generated CSV to the gid-reports bucket
s3.upload_file(Filename='./feb_final_report.csv',
Key='2019/feb/final_report.html', Bucket='gid-reports',
ExtraArgs = {'ACL': 'public-read'})
# Upload the generated HTML to the gid-reports bucket
s3.upload_file(Filename='./feb_final_report.html',
Key='2019/feb/final_report.html', Bucket='gid-reports',
ExtraArgs = {'ContentType': 'text/html',
'ACL': 'public-read'})
# List the gid-reports bucket objects starting with 2019/
objects_list = s3.list_objects(Bucket='gid-reports', Prefix='2019/')
# Convert the response contents to DataFrame
objects_df = pd.DataFrame(objects_list['Contents'])
# Create a column "Link" that contains Public Object URL
base_url = "http://gid-reports.s3.amazonaws.com/"
objects_df['Link'] = base_url + objects_df['Key']
# Preview the resulting DataFrame
objects_df.head()
# Write objects_df to an HTML file
objects_df.to_html('report_listing.html',
# Set clickable links
render_links=True,
# Isolate the columns
columns=['Link', 'LastModified', 'Size'])
# Overwrite index.html key by uploading the new file
s3.upload_file(
Filename='./report_listing.html', Key='index.html',
Bucket='gid-reports',
ExtraArgs = {
'ContentType': 'text/html',
'ACL': 'public-read'
})
# Initialize boto3 client for SNS
sns = boto3.client('sns',
region_name='us-east-1',
aws_access_key_id=AWS_KEY_ID,
aws_secret_access_key=AWS_SECRET)
# Create the city_alerts topic
response = sns.create_topic(Name="city_alerts")
c_alerts_arn = response['TopicArn']
# Get the current list of topics
topics = sns.list_topics()['Topics']
for topic in topics:
# For each topic, if it is not marked critical, delete it
if "critical" not in topic['TopicArn']:
sns.delete_topic(TopicArn=topic['TopicArn'])
# Print the list of remaining critical topics
print(sns.list_topics()['Topics'])
# Subscribe Elena's phone number to streets_critical topic
resp_sms = sns.subscribe(
TopicArn = str_critical_arn,
Protocol='sms', Endpoint="+16196777733")
# Print the SubscriptionArn
print(resp_sms['SubscriptionArn'])
# Subscribe Elena's email to streets_critical topic.
resp_email = sns.subscribe(
TopicArn = str_critical_arn,
Protocol='email', Endpoint="eblock@sandiegocity.gov")
# Print the SubscriptionArn
print(resp_email['SubscriptionArn'])
# For each email in contacts, create subscription to street_critical
for email in contacts['Email']:
sns.subscribe(TopicArn = str_critical_arn,
# Set channel and recipient
Protocol = 'email',
Endpoint = email)
# List subscriptions for streets_critical topic, convert to DataFrame
response = sns.list_subscriptions_by_topic(
TopicArn = str_critical_arn)
subs = pd.DataFrame(response['Subscriptions'])
# Preview the DataFrame
subs.head()
if vcounts['water'] > 100:
# If over 100 water violations, publish to water_critical
sns.publish(
TopicArn = dept_arns['water_critical'],
Message = "{} water issues".format(vcounts['water']),
Subject = "Help fix water violations NOW!")
comprehend = boto3.client('comprehend'
,
region_name=
'us-east-1'
,
aws_access_key_id=AWS_KEY_ID,
aws_secret_access_key=AWS_SECRET)
# For each dataframe row
for index, row in dumping_df.iterrows():
# Get the public description field
description =dumping_df.loc[index, 'public_description']
if description != '':
# Detect language in the field content
resp = comprehend.detect_dominant_language(Text=description)
# Assign the top choice language to the lang column.
dumping_df.loc[index, 'lang'] = resp['Languages'][0]['LanguageCode']
# Count the total number of spanish posts
spanish_post_ct = len(dumping_df[dumping_df.lang == 'es'])
# Print the result
print("{} posts in Spanish".format(spanish_post_ct))
for index, row in dumping_df.iterrows():
# Get the translated_desc into a variable
description = dumping_df.loc[index, 'public_description']
if description != '':
# Get the detect_sentiment response
response = comprehend.detect_sentiment(
Text=description,
LanguageCode='en')
# Get the sentiment key value into sentiment column
dumping_df.loc[index, 'sentiment'] = response['Sentiment']
# Preview the dataframe
dumping_df.head()
rekog = boto3.client(
'rekognition'
,
region_name=
'us-east-1'
,
aws_access_key_id=AWS_KEY_ID,
aws_secret_access_key=AWS_SECRET)
# Use Rekognition client to detect labels
image1_response = rekog.detect_labels(
# Specify the image as an S3Object; Return one label
Image=image1, MaxLabels=1)
# Print the labels
print(image1_response['Labels'])
# Create an empty counter variable
cats_count = 0
# Iterate over the labels in the response
for label in response['Labels']:
# Find the cat label, look over the detected instances
if label['Name'] == 'Cat':
for instance in label['Instances']:
# Only count instances with confidence > 85
if (instance['Confidence'] > 85):
cats_count += 1
# Print count of cats
print(cats_count)
response = rekog.detect_text(
Image={'S3Object':
{
'Bucket': 'datacamp-img'
,
'Name': 'report.jpg'
}
}
)
# Create empty list of words
words = []
# Iterate over the TextDetections in the response dictionary
for text_detection in response['TextDetections']:
# If TextDetection type is WORD, append it to words list
if text_detection['Type'] == 'WORD':
# Append the detected text
words.append(text_detection['DetectedText'])
# Print out the words list
print(words)
translate = boto3.client('translate'
,
region_name=
'us-east-1'
,
aws_access_key_id=AWS_KEY_ID, aws_secret_access_key=AWS_SECRET)
for index, row in dumping_df.iterrows():
# Get the public_description into a variable
description = dumping_df.loc[index, 'public_description']
if description != '':
# Translate the public description
resp = translate.translate_text(
Text=description,
SourceLanguageCode='auto', TargetLanguageCode='en')
# Store original language in original_lang column
dumping_df.loc[index, 'original_lang'] = resp['SourceLanguageCode']
# Store the translation in the translated_desc column
dumping_df.loc[index, 'translated_desc'] = resp['TranslatedText']
# Preview the resulting DataFrame
dumping_df = dumping_df[['service_request_id', 'original_lang', 'translated_desc']]
dumping_df.head()