# -*- coding: utf-8 -*-
"""
Created on Thu Apr 26 12:26:19 2018
@author: kishlay
"""
import requests # or urllib
import csv
import sys
reload(sys)
import urllib
from datetime import date, timedelta , datetime , time
import rfc3339
sys.setdefaultencoding('utf-8')
# get Youtube Data API Key
API_KEY = "" # insert your API key
# youtube channel ID
channel_id = "" # insert Youtube channel ID
page_token = ""
print('how are u')
videos = []
videosExtendTest = []
test = datetime.now()
delta = timedelta(5*30)
firstDate = test - delta
finalFormat = rfc3339.rfc3339(firstDate)
#print(finalFormat)
#print(test)
formatted = rfc3339.rfc3339(test)
print(rfc3339.rfc3339(test))
next = True
publishedBefore= datetime.now()
period = delta
finaltime = date(2008,1,1)
while True:
publishedAfter = publishedBefore - delta;
print("\n*****outerloop**\n startdate = "+str(publishedAfter)+ "\n before = "+str(publishedBefore))
if publishedAfter.date() < finaltime:
print('\n\n\n breaking'+ str(publishedAfter))
break
publishedAfterFormatted = rfc3339.rfc3339(publishedAfter)
publishedBeforeFormatted = rfc3339.rfc3339(publishedBefore)
page_token =''
while True:
print("inner Loop")
url = ("https://www.googleapis.com/youtube/v3/search?key="
"{}&channelId={}&part=snippet,id"
"&order=date&maxResults=50&pageToken={}&publishedAfter={}&publishedBefore={}"
).format(
API_KEY,
channel_id,
page_token,
urllib.quote(publishedAfterFormatted),
urllib.quote(publishedBeforeFormatted)
)
resp = requests.get(url)
data = resp.json()
#print("data = "+str(data))
print("legnth is data "+str(len(data['items'])))
print("before length" + str( len(videos)))
for i in data['items']:
videos.append(i)
#videosExtendTest.extend(i)
print("length of video array" + str(len(videos)))
# iterate through result pagination
is_next = data.get('nextPageToken')
if is_next:
page_token = is_next
else:
break
publishedBefore = publishedAfter;
# structuring the data
rows = []
count =0
for i in videos:
title = i['snippet'].get('title')
description = i['snippet'].get('description', "")
videoId = "https://www.youtube.com/watch?v={}".format(
i['id'].get('videoId', ""))
# add special formula [=image("url")], so we can view the thumbnail in google docs spreadsheet
count = count + 1
#print(videoId)
#print(count)
thumb = "=image(\"{}\")".format(i['snippet']['thumbnails'].get('default').get('url', ""))
#rows.append(";".join([title, description, videoId, thumb]))
rows.append(";".join([title, videoId]))
#print("#")
#print(rows[count-1])
#print("#")
# data is now ready to write to csv file
# writing to csv file
path = "videos.csv"
with open(path, "w") as csv_file:
writer = csv.writer(csv_file, delimiter=";")
for row in rows:
#print(row)
writer.writerow(row.split(";"))
print(count)
"""
Created on Thu Apr 26 12:26:19 2018
@author: kishlay
"""
import requests # or urllib
import csv
import sys
reload(sys)
import urllib
from datetime import date, timedelta , datetime , time
import rfc3339
sys.setdefaultencoding('utf-8')
# get Youtube Data API Key
API_KEY = "" # insert your API key
# youtube channel ID
channel_id = "" # insert Youtube channel ID
page_token = ""
print('how are u')
videos = []
videosExtendTest = []
test = datetime.now()
delta = timedelta(5*30)
firstDate = test - delta
finalFormat = rfc3339.rfc3339(firstDate)
#print(finalFormat)
#print(test)
formatted = rfc3339.rfc3339(test)
print(rfc3339.rfc3339(test))
next = True
publishedBefore= datetime.now()
period = delta
finaltime = date(2008,1,1)
while True:
publishedAfter = publishedBefore - delta;
print("\n*****outerloop**\n startdate = "+str(publishedAfter)+ "\n before = "+str(publishedBefore))
if publishedAfter.date() < finaltime:
print('\n\n\n breaking'+ str(publishedAfter))
break
publishedAfterFormatted = rfc3339.rfc3339(publishedAfter)
publishedBeforeFormatted = rfc3339.rfc3339(publishedBefore)
page_token =''
while True:
print("inner Loop")
url = ("https://www.googleapis.com/youtube/v3/search?key="
"{}&channelId={}&part=snippet,id"
"&order=date&maxResults=50&pageToken={}&publishedAfter={}&publishedBefore={}"
).format(
API_KEY,
channel_id,
page_token,
urllib.quote(publishedAfterFormatted),
urllib.quote(publishedBeforeFormatted)
)
resp = requests.get(url)
data = resp.json()
#print("data = "+str(data))
print("legnth is data "+str(len(data['items'])))
print("before length" + str( len(videos)))
for i in data['items']:
videos.append(i)
#videosExtendTest.extend(i)
print("length of video array" + str(len(videos)))
# iterate through result pagination
is_next = data.get('nextPageToken')
if is_next:
page_token = is_next
else:
break
publishedBefore = publishedAfter;
# structuring the data
rows = []
count =0
for i in videos:
title = i['snippet'].get('title')
description = i['snippet'].get('description', "")
videoId = "https://www.youtube.com/watch?v={}".format(
i['id'].get('videoId', ""))
# add special formula [=image("url")], so we can view the thumbnail in google docs spreadsheet
count = count + 1
#print(videoId)
#print(count)
thumb = "=image(\"{}\")".format(i['snippet']['thumbnails'].get('default').get('url', ""))
#rows.append(";".join([title, description, videoId, thumb]))
rows.append(";".join([title, videoId]))
#print("#")
#print(rows[count-1])
#print("#")
# data is now ready to write to csv file
# writing to csv file
path = "videos.csv"
with open(path, "w") as csv_file:
writer = csv.writer(csv_file, delimiter=";")
for row in rows:
#print(row)
writer.writerow(row.split(";"))
print(count)
Comments
Post a Comment