I found the Attributeerror error: 'Dataframe' Object has no attribute 'media_url' how can I adjust it? thanks


import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
import time
from google.cloud import storage
import io
import re
import requests
import gcsfs

def bq_date(x):
    if len(str(x.day)) == 1:
        day = "0" + str(x.day)
        day = str(x.day)

    if len(str(x.month)) == 1:
        month = "0" + str(x.month)
        month = str(x.month)

    return "{0}{1}{2}".format(x.year, month, day)

def list_gcs_objs(bucket, prefix):
    storage_client = storage.Client()
    bucket_check = storage_client.get_bucket(bucket)
    blob_list = list(bucket_check.list_blobs(prefix=prefix))
    obj_paths = list()
    if len(blob_list) <= 1:
        print("Folder empty\n")
        return obj_paths
        count = 1
    while count < len(blob_list):
        count += 1
    return obj_paths

def upload_to_gcs(bucket, object_key, data):
    storage_client = storage.Client(bucket)
    bucket_up = storage_client.get_bucket(bucket)
    blob_up = bucket_up.blob(object_key)
    response = blob_up.upload_from_string(data)
    return (response)

def getInstagramStoriesFeed(base_url):
    fields = "?fields=id,caption,media_type,media_url,permalink,timestamp,username"
    return base_url + fields

def getStories_Insights(post_id, access_token):
    base = "https://graph.facebook.com/v3.2/"
    arequest = "{0}/insights?access_token={1}&metric=".format(post_id, access_token) + \
    return base + arequest

def scrapeInstagramStories(page_id, access_token, since_date, until_date):
    global ndf, sf
    from datetime import date
    scrape_starttime = date.today()
    base_url = "https://graph.facebook.com/v3.2"
    node = "/{}/stories?fields=".format(page_id)
    fields = "id,caption,media_type,permalink,timestamp,username"
    parameters = "&limit=100&access_token={0}".format(access_token)
    anchor = since_date
    after = ''

    print("Scraping {} Instagram Page: {}\n".format(page_id, scrape_starttime))
    ctr = 1
    count = 0
    while (anchor >= since_date) & (count < 10):
        after = '' if after is '' else "&after={}".format(after)
        url = base_url + node + fields + parameters + after

        content = requests.get(url).json()
        sf = pd.DataFrame.from_dict(content['data'])

        if len(sf) > 0:
            sf['timestamp'] = pd.to_datetime(sf.timestamp, infer_datetime_format=True)
            sf['data'] = sf.timestamp.apply(lambda x: x.date())
            anchor = sf.data.min()

        # if there is no next page, we're done.
        if 'paging' in content:
            after = content['paging']['cursors']['after']
            if ctr == 1:
                ndf = sf.copy()
                ndf = pd.concat([sf, ndf], sort=False)
            ctr += 1

        count += 1

    if ndf.empty:
        return ndf

    date_index = list(pd.date_range(start=since_date, end=until_date))
    ndf['timestamp'] = pd.to_datetime(ndf.timestamp, infer_datetime_format=True)
    ndf['data'] = ndf.timestamp.apply(lambda x: x.date())

    ndf['data'] = np.where(ndf.data.isin(date_index),

    ndf.dropna(subset=['data'], inplace=True)
    ndf['data'] = pd.to_datetime(ndf.data, infer_datetime_format=True)

    impressions = {}
    reach = {}
    replies = {}
    exits = {}
    tf = {}
    tb = {}

    for post in ndf.id.unique():
        aux_url = getStories_Insights(post, access_token)
        insights = requests.get(aux_url).json()
        if len(insights) > 0:
            impressions.update({post: insights['data'][0]['values'][0]['value']})
            reach.update({post: insights['data'][1]['values'][0]['value']})
            replies.update({post: insights['data'][2]['values'][0]['value']})
            exits.update({post: insights['data'][3]['values'][0]['value']})
            tf.update({post: insights['data'][4]['values'][0]['value']})
            tb.update({post: insights['data'][5]['values'][0]['value']})
            impressions.update({post: 0})
            reach.update({post: 0})
            replies.update({post: 0})
            exits.update({post: 0})
            tf.update({post: 0})
            tb.update({post: 0})

    ndf['impressions'] = ndf.id.map(impressions)
    ndf['reach'] = ndf.id.map(reach)
    ndf['replies'] = ndf.id.map(replies)
    ndf['exits'] = ndf.id.map(exits)
    ndf['taps_forward'] = ndf.id.map(tf)
    ndf['taps_back'] = ndf.id.map(tb)

    ndf['id'] = ndf.id.astype('category')
    # ndf['caption'] = ndf.caption.astype('category')
    # ndf['media_type'] = ndf.media_type.astype('category')
    # ndf['permalink'] = ndf.permalink.astype('category')
    # ndf['username'] = ndf.username.astype('category')

    ndf['impressions'] = ndf.impressions.astype('int64')
    ndf['reach'] = ndf.reach.astype('int64')
    ndf['replies'] = ndf.replies.astype('int64')
    ndf['exits'] = ndf.exits.astype('int64')
    ndf['taps_forward'] = ndf.taps_forward.astype('int64')
    ndf['taps_back'] = ndf.taps_back.astype('int64')

    return ndf

def st_scrapper(request):
    global ndf
    since_date = (date.today() - timedelta(1))
    until_date = date.today()
    today = date.today().strftime("%Y%m%d")

    mybucket = "gdata-dn-gshow-sandbox"
    mainprefix = "AD/INS/"

    # List FB Avaiable Data

    maindata = list_gcs_objs(mybucket, mainprefix)
    ins_dates = [x[-12:-4] for x in maindata]

    # Queries
    query_tags = "SELECT * FROM `globoid.AD_gshow_hashtags`"

    dtags = pd.read_gbq(query_tags, dialect='standard', index_col="Hashtag")
    tags = dtags['Produto'].to_dict()

    user_token = ""

    gshow_pages = {'',

    gshow_pagelist = list(gshow_pages.keys())

    ctr = 1
    for page in gshow_pagelist:
        print("{0} | Date Range {1} - {2}".format(gshow_pages[page],
        if ctr == 1:
            df = scrapeInstagramStories(page_id=page, access_token=user_token,
                                        since_date=since_date, until_date=until_date)

            if df.empty:
                print("{0} is empty!".format(gshow_pages[page]))
                ndf = df.copy()

            ctr += 1
            df = scrapeInstagramStories(page_id=page, access_token=user_token,
                                        since_date=since_date, until_date=until_date)

            if df.empty:
                print("{0} is empty!".format(gshow_pages[page]))
                ndf = pd.concat([ndf, df], sort=False)

            ctr += 1

    ndf['timestamp'] = pd.to_datetime(ndf.timestamp, infer_datetime_format=True)
    ndf['timestamp'] = ndf.timestamp.dt.tz_localize('UTC')
    ndf['timestamp'] = ndf.timestamp.dt.tz_convert('America/Sao_Paulo')

    ndf.caption.fillna("None", inplace=True)
    ndf['completion_rate'] = 1 - (ndf.exits + ndf.taps_forward - ndf.taps_back) / (ndf.impressions)
    ndf.drop('data', axis=1, inplace=True)

    ndf['hashtag'] = ndf.caption.apply(lambda x: "#" + re.findall(r"#(\w+)", x)[0]
    if len(re.findall(r"#(\w+)", x)) > 0
    else "None")

    ndf['hashtag2'] = ndf.caption.apply(lambda x: "#" + re.findall(r"#(\w+)", x)[1]
    if len(re.findall(r"#(\w+)", x)) > 1
    else "None")

    pagen = {'bbb': 'Big Brother Brasil', 'gshow': 'GSHOW',
             'caldeiraodohuck': 'Caldeirão do Huck', 'oficialzorra': 'Zorra',
             'popstar': 'Popstar', 'thevoicebrasil': 'The Voice Brasil',
             'conversacombial': 'Conversa com Bial', 'malhacao': 'Malhação - Toda Forma de Amar'}

    ndf['produto'] = np.where(ndf.username == "gshow", ndf.hashtag.map(tags), np.nan)
    ndf['produto'] = np.where((ndf.username == "gshow") & (ndf.produto.isnull()),
                              ndf.hashtag2.map(tags), ndf.produto)

    ndf['produto'] = np.where((ndf.username != "gshow") & (ndf.produto.isnull()),
                              ndf.username.map(pagen), ndf.produto)

    ndf['produto'] = np.where(ndf.produto.isnull(), "GSHOW", ndf.produto)
    ndf.drop(['hashtag', 'hashtag2'], axis=1, inplace=True)

    ndf['caption'] = ndf.caption.astype('category')
    ndf['id'] = ndf.id.astype('category')
    ndf['media_type'] = ndf.media_type.astype('category')
    ndf['media_url'] = ndf.media_url.astype('category')
    ndf['permalink'] = ndf.permalink.astype('category')
    ndf['username'] = ndf.username.astype('category')
    ndf['produto'] = ndf.produto.astype('category')

    ndf = ndf[['timestamp', 'username', 'id', 'caption', 'permalink', 'media_type', 'impressions',
               'video_views', 'like_count', 'comments_count', 'saved', 'reach', 'interactions', 'produto']]

    ndf['timestamp'] = ndf.timestamp.apply(bq_date)
    ndf['timestamp'] = ndf.timestamp.astype("category")

    df = ndf.copy()

    for dtx in df[df.timestamp != today].timestamp.unique():

        s = io.StringIO()
        aux = df[df.timestamp == dtx].copy()
        aux.to_csv(s, sep=",", encoding="utf-8", index=False)

        response = upload_to_gcs('gdata-dn-gshow-sandbox',

        print("{0} - {1}".format(dtx, response), end="\r", flush=True)


nsertId: "000000-df33f5d7-a462-4357-bd60-15a52f1b66c5" Labels: {...} logname: "Projects/gdata-Dn-Gshow-sandbox/logs/cloudfunctions.googleapis.com%2Fcloud-functions" receiveTimestamp: "2019-08-07T17:39:07.365346300Z" Resource: {...}
Severity: "ERROR" textPayload: "Traceback (Most recent call last): File "/env/local/lib/python3.7/site-Packages/google/cloud/functions/worker.py", line 346, in run_http_function result = _function_handler.invoke_user_function(flask.request) File "/env/local/lib/python3.7/site-Packages/google/cloud/functions/worker.py", line 217, in invoke_user_function Return call_user_function(request_or_event) File "/env/local/lib/python3.7/site-Packages/google/cloud/functions/worker.py", line 210, in call_user_function Return self. _user_function(request_or_event) File "/user_code/main.py", line 262, in st_scrapper ndf['media_url'] = ndf.media_url.astype('Category') File "/env/local/lib/python3.7/site-Packages/pandas/core/Generic.py", line 5180, in getattr Return Object.getattribute(self, name) Attributeerror: 'Dataframe' Object has no attribute 'media_url' " timestamp: "2019-08-07T17:39:01.147Z" trace: "Projects/gdata-Dn-Gshow-sandbox/Traces/a1448d780d796c9f70f4903775329c7


