From 713b469ee3489417ff34675fa8bee67ae73da70c Mon Sep 17 00:00:00 2001 From: microsheep Date: Wed, 13 Jul 2016 12:22:18 +0800 Subject: [PATCH] INIT of repo dauGA --- .gitignore | 2 + README.md | 55 +++++++++++ dauGA.py | 231 +++++++++++++++++++++++++++++++++++++++++++++++ google_auth.py | 36 ++++++++ sample_config.py | 126 ++++++++++++++++++++++++++ 5 files changed, 450 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 dauGA.py create mode 100644 google_auth.py create mode 100644 sample_config.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ac2113b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +config.py +**.swp diff --git a/README.md b/README.md new file mode 100644 index 0000000..f70d689 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# 把 GA 的 Log Raw Data 抓出來再上傳到 BigQuery + +## 運行方式 +1. Copy sample_config.py to config.py +2. 修改 config.py 中的 PROJECT_ID, DATA_SET, VIEW_ID, DATE_INIT +3. 執行 DauGA.py (python2), 須提供credential的path, 此credential需要有google analytics跟google big query的權限 + + +## 重要資源 +Google提供的[Demo網站](https://ga-dev-tools.appspot.com/dimensions-metrics-explorer/),這個真的很重要,但是有很難找,應該會有機會用到。 + + +## 功能簡介 +這個 Code 是根據 Blog 上面一篇[文章](http://daynebatten.com/2015/07/raw-data-google-analytics/)寫成的。 +文章的簡述了如何把 GA 裡面的 Raw Data Dump 出來,然後存進 BigQuery 裡面(這個功能如果由Google官方的服務來進行,一個月至少要付500美金,所以讓我們懷著感恩的心地使用這項功能吧。) + +能夠做到這個功能的關鍵是,你跟 GA 藉由 [Report API](https://developers.google.com/admin-sdk/reports/v1/reference/) 所撈取出來的資料,每筆資料欄位都只對應到一筆資料 -- 理論上,Report API希望你撈取的資料是經過統計計算的結果,舉例:每個縣市的不同類型使用者(新使用者、重複使用者),在某方面(頁面停留時間or點擊次數or...)的統計結果,前者叫做 Dimension,後者叫做 Metrics,意思就是說,Report API report 給你不同 Dimension 下的 Metrics 統計結果。有關於不同的 Dimension 和 Metrics 介紹請看[這裡](https://developers.google.com/analytics/devguides/reporting/core/dimsmets)。 + +但是 Blog 裡面提供了一個厲害而簡單作法,GA 本身就有提供把自訂的 Customized Dimension 傳給 GA。如果我們傳給 GA 的 Customized Dimension 是完全不會重複的(ex. unique_user__id and timestamp),我們跟 Report API request 資料的時候,它就會都把每一筆分開來,然後我們就可以拿到 GA log 的 Raw Data 了!!! + +目前 Report API 提供 50000 次 request,每次上線 10000 筆,所以除非網站已經大到一天要 Log 5億筆資料,不然這個功能可以完全滿足我們的需求 --> 順利把 GA log 的 raw data request 出來然後上傳到 BigQuery 裡面,方便我們做各樣的分析。更棒的是,你可以順便把一些 GA 提供的很棒的使用者資料 (ex. 地區、使用系統、機型、瀏覽器)一併帶出來,供後續使用。 + + +## 事前預備和程式執行方式 +我們的目的是儘量自動化把資料重 GA dump 出來,然後自動化上傳到 BigQuery 形成我們需要的 Table,按時更新資料(把新的資料 append 到 BigQuery 上面),讓我們以後只要負責開心在 BigQuery 裡面做資料分析就好了。 + + +- 在執行 dauGA.py 之前需要準備好的注意事項... +- 先請上 [Google Cloud Platform](https://console.cloud.google.com/iam-admin/serviceaccounts/project) 申請你專屬的 Service Account,然後把檔案路徑 update 到 Code 的 CREDENTIAL_PATH 裡面。 +- 安裝特殊的 pandas,因為目前的 pandas 不支援把 table append 不是它自己創造的 BigQuery table ([相關issue](https://github.com/pydata/pandas/issues/13086)),但是我們需要這項功能,因此先 Fork 一個 Pandas 出來,目前 issue 上表示會在下一版 (0.18.2) 修正這個問題。 + +``` + pip install git+https://github.com/junyiacademy/pandas.git@tmp_solution_to_append_existed_bq_table +``` + +- 視需要修改config.py的參數 +- 裡面最重要的是 DATA_SET ,如果你是在 local 端執行,請務必改掉 DATA_SET 的參數,避免污染或破壞 BigQeury 上面我們真正在使用的 Table +- 如果你要新增屬於你自己的 table,請把你要新增的 table 加在 ga\_bq\_config 這個變數裡面,它決定了你要從 GA report api dump 哪些東西下來,最終上傳哪些資料。關於 report\_request 的寫法,可以參考[這裡](https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet)。另外還要指定 "destination\_table" (你想把檔案存取到 BigQuery 的哪個 Table) +- 機制概述:程式每次跑的時候,都會去到 {{DATA\_SET}}.upload\_progress\_log 的 Table 裡面,逐一去 check 從 2016-6-4(DATE_INIT) 到跑程式的前一天,每天每個要 update 的表格,是否有 "success" 的記錄,如果沒有,則把對應 table 裡面這一天的資料刪除(確定之後再次 load 資料的時候不會發生重複),然後在把那一天的資料重新 dump 一次。以此來確定系統和資料的穩定性。 +- 不論每次程式 Dump 資料的成功與否,都會再 Update 到 {{DATA\_SET}}.upload\_progress\_log,供之後程式 Dump 資料時做參考,或供工程師 Review。 + +## 測試方式 +#### 整個功能最主要做的事情有 1. 把資料從 GA 抓出來 2. 把資料做一些調整 3. 把資料上傳到 BigQuery 4. 確保 Robustness + +1. 可以在 function request\_df\_from\_ga 裡面去 check 你從 GA 抓下來的資料是否符合你在 ga\_bq\_config 裡面的設定 +2. 把資料做調整的過程寫在 function columns\_conversion 裡面,你可以讀取 function 回傳的 dataframe 確保要準備上傳的 table 符合你的需要。 +3. 最後在 table 成功上傳後,check BigQuery 上面相對應的 table 和你 local 端的 dataframe 是一致的。 +4. Robustness 是最需要小心測試的部份,因為 Code 是預期每天自動跑的,又牽扯到很多 外部的API,發生錯誤是預期當中的事情,我們不會希望每次出問題所有的資料都毀了需要重新來過,最理想的狀況是,哪一次程式 fail 了,之後重跑的時候可以直接修正掉之前的錯誤。程式實作的方法寫在前面的機制概述裡面。 +5. Robustness 測試的方法,是最重要的,目前的作法是在程式跑到不同階段 Raise Error,然後看 a. 會不會有殘缺的資料 upload 到 BigQuery?如果沒有,下次跑程式的時候這一天這個 table 的資料是不是還是會補上;如果有殘缺的資料上到 BigQuery,下次跑程式的時候是不是會先把殘缺的資料刪除掉。 +6. Check {{DATA\_SET}}.upload\_progress\_log 裡面每個 table 每天成功資料所記錄的 uploaded_data_size 和相對應的 table ga_session_date 的 count 是否一樣,來確認有沒有重複或缺少 load 資料的情況。 + +## Future Work + +1. 目前上傳的 BrowserUTC time 本身是含有 milli-second 的資料,但是因為 pd.to_gbq function 似乎只會上傳到 second 而已,如果真的需要更高的 resolution,之後可以有兩個作法 a. 修改 Pandas, b. 改成上傳 string,之後再用 query 的方式處理。 +2. ga_session_time & backup_date 其實都不是 UTC time,但是資料欄位裡面還是顯示是 utc ,但是目前看起來也不好調,很可能要動到 pandas,之後可以找機會來處理。 diff --git a/dauGA.py b/dauGA.py new file mode 100644 index 0000000..ce2613a --- /dev/null +++ b/dauGA.py @@ -0,0 +1,231 @@ +import argparse +import datetime +''' +pandas package now is installed from git+https://github.com/junyiacademy/pandas.git@tmp_solution_to_append_existed_bq_table +It is a fork from pandas for us to be able to append to existed bq table. +please use the following command to install pandas: + pip install git+https://github.com/junyiacademy/pandas.git@tmp_solution_to_append_existed_bq_table +''' +import pandas as pd +# A self written library to handle google authorization +from google_auth import google_auth +# Import our config file +import config + +def bq_query_to_table(query, table): # query from bq then save into a bq table + dataset = table.split('.')[0] + table = table.split('.')[1] + job = bigquery.jobs().insert(projectId=config.PROJECT_ID, + body={"projectId": config.PROJECT_ID, + "configuration":{ + "query": { + "query": query, + "destinationTable": { + "projectId": config.PROJECT_ID, + "datasetId": dataset, + "tableId": table + }, + "writeDisposition":"WRITE_TRUNCATE", + "createDisposition":"CREATE_IF_NEEDED" + } + } + }).execute() + return job['id'] + + +def check_table_exist(table): + dataset = table.split('.')[0] + table = table.split('.')[1] + result = bigquery.tables().list(projectId=config.PROJECT_ID, datasetId=dataset).execute() + if not 'tables' in result: + return False + table_list = [i['tableReference']['tableId'] for i in result['tables']] + return table in table_list + + +def check_ga_session_date_exist(destination_table, date, credential_path): # check if destination table has data of certain date + if not check_table_exist(destination_table): # has no certain date if the table not exist + return False + query = 'SELECT count(*) FROM [%s] WHERE DATE(ga_session_date) == "%s"' % (destination_table, date.strftime("%Y-%m-%d")) + return (pd.read_gbq(query, project_id=config.PROJECT_ID, verbose=False, private_key=credential_path).iloc[0, 0] > 0) + + +def remove_certain_ga_session_date_data(destination_table, date): # remove data of certain date + query = 'SELECT * FROM [%s] WHERE DATE(ga_session_date) != "%s"' % (destination_table, date.strftime("%Y-%m-%d")) + return bq_query_to_table(query, destination_table) + + +def parse_result_to_df(result): # convert ga request response to df + columns_list = [] + columns_list.extend(result['reports'][0]['columnHeader']['dimensions']) + columns_list.extend([i['name'] for i in result['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries']]) + + row_num = len(result['reports'][0]['data']['rows']) + df = pd.DataFrame(columns = columns_list, index=range(row_num)) + for i, row in enumerate(result['reports'][0]['data']['rows']): + list_to_append = [] + list_to_append.extend(row['dimensions']) + list_to_append.extend(row['metrics'][0]['values']) + for j in range(len(list_to_append)): + df.iat[i, j] = list_to_append[j] # df.append(my_dict, ignore_index=True) + return df + + +def unix_time_millis(dt): + epoch = datetime.datetime.utcfromtimestamp(0) + return (dt - epoch).total_seconds() * 1000.0 + + +def convert_js_date_format(date_str): # change the js datetime format to python datetime format + if date_str.isdigit(): + return date_str + date_str = date_str.replace(' GMT', '').replace(' UTC', '') + if date_str.count(":") == 3: + try: + return unix_time_millis(datetime.datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S:%f")) + except: + # print "date_str: %s cannot be converted" % date_str + return date_str + elif date_str.count(":") == 2: + try: + return unix_time_millis(datetime.datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S")) + except: + # print "date_str: %s cannot be converted" % date_str + return date_str + else: + return date_str + + +def columns_conversion(df, date): # change the df we request from ga to the one we upload to bq + columns = [c.replace(':', '_') for c in df.columns] + for i, c in enumerate(columns): + if c == "ga_dimension8": + columns[i] = "user_key_name" + elif c == "ga_dimension9": + columns[i] = "browser_utc_time" + df.iloc[:, i] = df.iloc[:, i].apply(convert_js_date_format).astype(str) + elif c == "ga_dimension10": + columns[i] = "cookie_uuid" + elif c == "ga_timeOnPage" or c == "ga_pageviews" or c == "ga_hits": + df.iloc[:, i] = df.iloc[:, i].apply(lambda x: int(float(x))) + elif c == "ga_exits": + df.iloc[:, i] = df.iloc[:, i].astype(bool) + df.columns = columns + if 'ga_dateHour' in df.columns and 'ga_minute' in df.columns: + df.loc[:, 'ga_session_time'] = pd.to_datetime((df.loc[:, 'ga_dateHour'] + df.loc[:, 'ga_minute']), format="%Y%m%d%H%M") + df.drop(['ga_dateHour', 'ga_minute'], inplace=True, axis=1) + df['ga_session_date'] = pd.to_datetime(date) # we always add ga session date to data + return df + + +def request_df_from_ga(request, page_token=""): + request["reportRequests"][0]["pageToken"] = page_token + result = analytics.reports().batchGet(body=request).execute() + if 'rows' not in result['reports'][0]['data']: # get no data from GA + print 'reqeust from Ga get no data. Row number is 0' + return (0, -1) + df = parse_result_to_df(result) + if 'nextPageToken' in result['reports'][0]: + return (df, result['reports'][0]['nextPageToken']) + else: + return (df, -1) + + +def daterange(start_date, end_date): + for n in range(int((end_date - start_date).days)): + yield start_date + datetime.timedelta(n) + + +def ga_upload_to_bq_by_day(ga_to_bg_config_name, date, credential_path): + if not isinstance(date, datetime.date): + print 'force the date parameter as datetiem.date format' + return None + + request_body = config.ga_bq_config[ga_to_bg_config_name]["request_body"] + destination_table = config.ga_bq_config[ga_to_bg_config_name]["destination_table"] + + if len(request_body["reportRequests"]) > 1: + print 'only allowed one reportRequests at this time' + return None + + request_body["reportRequests"][0]['dateRanges'] = [{"startDate": date.strftime("%Y-%m-%d"), "endDate": date.strftime("%Y-%m-%d")}] + + cont_page_token = '' + total_row = 0 + finish_flag = False + retry_limit_flag = False + cont_page_token = '' + retry_count = 0 + print 'Start loading data from GA and upload to %s, from %s' % (destination_table, date) + for i in range(1000): # GA report API request limit: 1000 --> set limit to 50,000,000 row per day + try: + (df, cont_page_token) = request_df_from_ga(request_body, cont_page_token) + df = columns_conversion(df, date) + df.to_gbq(destination_table=destination_table, project_id=config.PROJECT_ID, if_exists='append', private_key=credential_path) + # df.to_csv("%s-%s-data" % (ga_to_bg_config_name, date)) + row_num = len(df.index) + total_row = total_row + row_num + if cont_page_token == -1: + finish_flag = True + + except Exception as e: + print "Failing download response from Ga or upload to %s" % destination_table + print str(e) + retry_count += 1 + print "already tried %s times" % retry_count + if retry_count == 10: + retry_limit_flag = True + + if finish_flag: + print 'Successfully download response from Ga and upload to %s' % destination_table + return {"status": "success", "data_size": total_row} + elif retry_limit_flag: + print "Reach retry limit, Script Closed" + return {"status": "failure", "data_size": total_row} + print "Download GA data exceed row limit!!! Need to increase the GA report API request limit" + return {"status": "failure", "data_size": total_row} + + +if __name__ == "__main__": + # Parse the argument to get the credential_path + parser = argparse.ArgumentParser(description='Input secre_json_path and corresponding dataset') + parser.add_argument('--credential_path', type=str, dest='credential_path', required=True, help='input the path of service account credential from gcp, use $gcp_service_account in jenkings') + args = vars(parser.parse_args()) + credential_path = args["credential_path"] + # Use google_auth library to get access to google + Auth = google_auth(credential_path) + bigquery = Auth.get_auth('bigquery_v2') + analytics = Auth.get_auth('analytics_v4') + # Check if the GA_BQ_UPLOAD_STATUS_LOG table exist in gbq + if check_table_exist(config.GA_BQ_UPLOAD_STATUS_LOG): + ga_bq_upload_status_log = pd.read_gbq(query="SELECT * FROM [%s]" % config.GA_BQ_UPLOAD_STATUS_LOG, project_id=config.PROJECT_ID, private_key=credential_path) + else: + ga_bq_upload_status_log = pd.DataFrame(columns=['config_name', 'ga_session_date', 'status', 'backup_date', "uploaded_data_size"]) + # Set the time region + d = config.DATE_INIT.split("-") + date_init = datetime.date(int(d[0]),int(d[1]),int(d[2])) + date_now = datetime.datetime.now().date() + + for config_name in config.ga_bq_config: + for date in daterange(date_init, date_now): + destination_table = config.ga_bq_config[config_name]["destination_table"] + print "start checking (%s, %s) pair for GA to BQ" % (config_name, date) + condition = (ga_bq_upload_status_log["config_name"]==config_name) & (ga_bq_upload_status_log["ga_session_date"]==date.strftime("%Y-%m-%d")) + if ga_bq_upload_status_log[condition].empty: # no such condition, totally new table-date pair + print 'find no pair within the record, try to upload data with (%s, %s)' % (config_name, date) + if check_ga_session_date_exist(destination_table, date, credential_path): + print 'find corresponding data in bq table, remove them.' + remove_certain_ga_session_date_data(destination_table, date) + upload_result = ga_upload_to_bq_by_day(config_name, date, credential_path) + current_result = pd.DataFrame(data={"config_name": config_name, "ga_session_date": date.strftime("%Y-%m-%d"), "status": upload_result['status'], "backup_date": date_now.strftime("%Y-%m-%d"), "uploaded_data_size": upload_result['data_size']}, index=[0]) + print "update corresponding result of (%s, %s) to %s" % (config_name, date, config.GA_BQ_UPLOAD_STATUS_LOG) + current_result.to_gbq(destination_table=config.GA_BQ_UPLOAD_STATUS_LOG, project_id=config.PROJECT_ID, if_exists='append', private_key=credential_path) + elif 'success' in ga_bq_upload_status_log[condition]['status'].values: + print "already success in such pair" + else: # if failure, remove the data of that date/table and re-upload again + print 'find pair with failure status, remove existed data and re-uploard' + remove_certain_ga_session_date_data(destination_table, date) + upload_result = ga_upload_to_bq_by_day(config_name, date, credential_path) + current_result = pd.DataFrame(data={"config_name": config_name, "ga_session_date": date.strftime("%Y-%m-%d"), "status": upload_result['status'], "backup_date": date_now.strftime("%Y-%m-%d"), "uploaded_data_size": upload_result['data_size']}, index=[0]) + print "update corresponding result of (%s, %s) to %s" % (config_name, date, config.GA_BQ_UPLOAD_STATUS_LOG) + current_result.to_gbq(destination_table=config.GA_BQ_UPLOAD_STATUS_LOG, project_id=config.PROJECT_ID, if_exists='append', private_key=credential_path) diff --git a/google_auth.py b/google_auth.py new file mode 100644 index 0000000..b77cb66 --- /dev/null +++ b/google_auth.py @@ -0,0 +1,36 @@ +from oauth2client.service_account import ServiceAccountCredentials +from googleapiclient.discovery import build +from httplib2 import Http + + +class google_auth: + credential_path = "./credential.json" + types = { + 'analytics_v4': { + 'api_name': 'analyticsreporting', + 'scope': 'https://www.googleapis.com/auth/analytics', + 'version': 'v4' + }, + 'bigquery_v2': { + 'api_name': 'bigquery', + 'scope': 'https://www.googleapis.com/auth/bigquery', + 'version': 'v2' + }, + 'storage_v1': { + 'api_name': 'storage', + 'scope': 'https://www.googleapis.com/auth/cloud-platform', + 'version': 'v1' + } + } + + def __init__(self, path): + self.credential_path = path + + def get_auth(self, name): + if name in self.types: + credentials = ServiceAccountCredentials.from_json_keyfile_name(self.credential_path, self.types[name]['scope']) + http_auth = credentials.authorize(Http()) + auth_object = build(self.types[name]['api_name'], self.types[name]['version'], http=http_auth) + return auth_object + else: + raise LookupError('There is no such type!') diff --git a/sample_config.py b/sample_config.py new file mode 100644 index 0000000..b42a6ff --- /dev/null +++ b/sample_config.py @@ -0,0 +1,126 @@ +# Copy this file to config.py and change the settings + +# Project ID and dataset to use in Big query +PROJECT_ID = "my_project" +#DATA_SET = "GoogleAnalyticsData" +DATA_SET = "my_ga_data" +GA_BQ_UPLOAD_STATUS_LOG = (DATA_SET+".upload_progress_log") +# View ID of GA. Check your GA view to find it. +VIEW_ID = "12345678" +# First day we upload user_key_email and time so we can request every single log from ga +DATE_INIT = "2016-1-1" + +# Tables we want to store from GA +ga_bq_config = { + "user_page_view": { + "request_body": { + "reportRequests": + [ + { + "viewId": VIEW_ID, + "dateRanges": [{"startDate": "", "endDate": ""}], + "dimensions": [{"name": "ga:dimension8"}, {"name": "ga:dimension9"}, {"name": "ga:pagePath"}, {"name": "ga:dateHour"}, {"name": "ga:minute"}], + "dimensionFilterClauses": {}, + "metrics": [{"expression": "ga:timeOnPage"}, {"expression": "ga:exits"}], + "orderBys": [{"fieldName": "ga:dateHour"}, {"fieldName": "ga:minute"}, {"fieldName": "ga:dimension8"}], + "pageSize": "10000", + "pageToken": "" + } + ] + }, + "destination_table": DATA_SET+".ga_page_view" + }, + "user_event":{ + "request_body": { + "reportRequests": + [ + { + "viewId": VIEW_ID, + "dateRanges": [{"startDate": "", "endDate": ""}], + "dimensions": [{"name": "ga:dimension8"}, {"name": "ga:dimension9"}, {"name": "ga:dateHour"}, {"name": "ga:minute"}, {"name": "ga:eventCategory"}, {"name": "ga:eventAction"}, {"name": "ga:eventLabel"}], + "dimensionFilterClauses": {}, + "metrics": [{"expression": "ga:hits"}], + "orderBys": [{"fieldName": "ga:dateHour"}, {"fieldName": "ga:minute"}, {"fieldName": "ga:dimension8"}], + "pageSize": "10000", + "pageToken": "" + } + ] + }, + "destination_table": DATA_SET+".ga_user_event" + }, + "user_device": { + "request_body": { + "reportRequests": + [ + { + "viewId": VIEW_ID, + "dateRanges": [{"startDate": "", "endDate": ""}], + "dimensions": [{"name": "ga:dimension8"}, {"name": "ga:deviceCategory"}, {"name": "ga:operatingSystem"}, {"name": "ga:operatingSystemVersion"}, + {"name": "ga:browser"}, {"name": "ga:browserVersion"}], + "dimensionFilterClauses": {}, + "metrics": [{"expression": "ga:timeOnPage"}, {"expression": "ga:pageviews"}], + "orderBys": [{"fieldName": "ga:dimension8"}], + "pageSize": "10000", + "pageToken": "" + } + ] + }, + "destination_table": DATA_SET+".ga_user_device" + }, + "user_region": { + "request_body": { + "reportRequests": + [ + { + "viewId": VIEW_ID, + "dateRanges": [{"startDate": "", "endDate": ""}], + "dimensions": [{"name": "ga:dimension8"}, {"name": "ga:region"}, {"name": "ga:city"}], + "dimensionFilterClauses": {}, + "metrics": [{"expression": "ga:timeOnPage"}, {"expression": "ga:pageviews"}], + "orderBys": [{"fieldName": "ga:dimension8"}], + "pageSize": "10000", + "pageToken": "" + } + ] + }, + "destination_table": DATA_SET+".ga_user_region" + }, + "user_cookie_map": { + "request_body": { + "reportRequests": + [ + { + "viewId": VIEW_ID, + "dateRanges": [{"startDate": "", "endDate": ""}], + "dimensions": [{"name": "ga:dimension8"}, {"name": "ga:dimension10"}], + "dimensionFilterClauses": {}, + "metrics": [{"expression": "ga:timeOnPage"}, {"expression": "ga:pageviews"}], + "orderBys": [{"fieldName": "ga:dimension8"}], + "pageSize": "10000", + "pageToken": "" + } + ] + }, + "destination_table": DATA_SET+".ga_user_cookie_map" + }, + "user_mobile": { + "request_body": { + "reportRequests": + [ + { + "viewId": VIEW_ID, + "dateRanges": [{"startDate": "", "endDate": ""}], + "dimensions": [{"name": "ga:dimension8"}, {"name": "ga:mobileDeviceInfo"}, + {"name": "ga:operatingSystem"}, {"name": "ga:operatingSystemVersion"}, + {"name": "ga:browser"}, {"name": "ga:browserVersion"}], + "dimensionFilterClauses": {}, + "metrics": [{"expression": "ga:timeOnPage"}, {"expression": "ga:pageviews"}], + "orderBys": [{"fieldName": "ga:dimension8"}], + "pageSize": "10000", + "pageToken": "" + } + ] + }, + "destination_table": DATA_SET+".ga_user_mobile" + }, +}