1- -- - Status: 2018-08-28
1+ -- - Status: 2018-09-23
22-- - Execute this in BigQuery
33
44-- - select all source code lines of text files that contain a link to Stack Overflow
3131)
3232WHERE REGEXP_CONTAINS(line , r' (?i:https?://stackoverflow\. com/[^\s )\.\" ]*)' );
3333
34- => gh_so_references_2018_08_28 .matched_lines
34+ => gh_so_references_2018_09_23 .matched_lines
3535
3636
3737-- - join with table "files" to get information about repos
@@ -44,11 +44,11 @@ SELECT
4444 size,
4545 url,
4646 line
47- FROM ` sotorrent-org.gh_so_references_2018_08_28 .matched_lines` as lines
47+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .matched_lines` as lines
4848LEFT JOIN ` bigquery-public-data.github_repos.files` as files
4949ON lines .file_id = files .id ;
5050
51- => gh_so_references_2018_08_28 .matched_files
51+ => gh_so_references_2018_09_23 .matched_files
5252
5353
5454-- - normalize the SO links to (http://stackoverflow.com/(a/q)/<id>)
7272 ELSE url
7373 END as url,
7474 line
75- FROM ` sotorrent-org.gh_so_references_2018_08_28 .matched_files` ;
75+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .matched_files` ;
7676
77- => gh_so_references_2018_08_28 .matched_files_normalized
77+ => gh_so_references_2018_09_23 .matched_files_normalized
7878
7979
8080-- - extract post id from links, set post type id, and extract file extension from path
@@ -96,19 +96,19 @@ SELECT
9696 END as post_type_id,
9797 url,
9898 line
99- FROM ` sotorrent-org.gh_so_references_2018_08_28 .matched_files_normalized`
99+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .matched_files_normalized`
100100WHERE
101101 REGEXP_CONTAINS(url, r' (http:\/\/ stackoverflow\. com\/ (?:a|q)\/ [\d ]+)' );
102102
103- => gh_so_references_2018_08_28 .matched_files_aq
103+ => gh_so_references_2018_09_23 .matched_files_aq
104104
105105
106106-- - use camel case for column names, add number of copies, and remove line content for export to MySQL database
107107# standardSQL
108108WITH
109109 copies AS (
110110 SELECT file_id, count (* ) as copies
111- FROM ` sotorrent-org.gh_so_references_2018_08_28 .matched_files_aq`
111+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .matched_files_aq`
112112 GROUP BY file_id
113113 )
114114SELECT
@@ -123,16 +123,16 @@ SELECT
123123 post_type_id as PostTypeId,
124124 url as SOUrl,
125125 CONCAT(' https://raw.githubusercontent.com/' , repo_name, " /" , branch, " /" , path ) as GHUrl
126- FROM ` sotorrent-org.gh_so_references_2018_08_28 .matched_files_aq` files
126+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .matched_files_aq` files
127127JOIN copies
128128ON files .file_id = copies .file_id ;
129129
130- => gh_so_references_2018_08_28 .PostReferenceGH
130+ => gh_so_references_2018_09_23 .PostReferenceGH
131131
132132
133133
134134# ##################################################################
135- # the following tables are not present in gh_so_references_2018_08_28
135+ # the following tables are not present in gh_so_references_2018_09_23
136136# will only be created on demand
137137# ##################################################################
138138
155155 parent_id as ParentId,
156156 SOUrl,
157157 GHUrl
158- FROM ` sotorrent-org.gh_so_references_2018_08_28 .PostReferenceGH` ref
158+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .PostReferenceGH` ref
159159 LEFT JOIN ` bigquery-public-data.stackoverflow.posts_answers` a
160160 ON ref .PostId = a .id
161161 WHERE PostTypeId= 2
@@ -180,7 +180,7 @@ FROM answers
180180LEFT JOIN ` bigquery-public-data.stackoverflow.posts_questions` q
181181ON answers .ParentId = q .id ;
182182
183- => gh_so_references_2018_08_28 .PostReferenceGH_Answers
183+ => gh_so_references_2018_09_23 .PostReferenceGH_Answers
184184
185185
186186# standardSQL
@@ -192,9 +192,9 @@ SELECT
192192 CommentCount,
193193 Score,
194194 ParentViewCount
195- FROM ` sotorrent-org.gh_so_references_2018_08_28 .PostReferenceGH_Answers` ;
195+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .PostReferenceGH_Answers` ;
196196
197- => gh_so_references_2018_08_28 .PostReferenceGH_Answers_R
197+ => gh_so_references_2018_09_23 .PostReferenceGH_Answers_R
198198
199199
200200-- - retrieve info about referenced SO questions
@@ -214,12 +214,12 @@ SELECT
214214 view_count as ViewCount,
215215 SOUrl,
216216 GHUrl
217- FROM ` sotorrent-org.gh_so_references_2018_08_28 .PostReferenceGH` ref
217+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .PostReferenceGH` ref
218218LEFT JOIN ` bigquery-public-data.stackoverflow.posts_questions` q
219219ON ref .PostId = q .id
220220WHERE PostTypeId= 1 ;
221221
222- => gh_so_references_2018_08_28 .PostReferenceGH_Questions
222+ => gh_so_references_2018_09_23 .PostReferenceGH_Questions
223223
224224
225225# standardSQL
@@ -231,6 +231,6 @@ SELECT
231231 CommentCount,
232232 Score,
233233 ViewCount
234- FROM ` sotorrent-org.gh_so_references_2018_08_28 .PostReferenceGH_Questions` ;
234+ FROM ` sotorrent-org.gh_so_references_2018_09_23 .PostReferenceGH_Questions` ;
235235
236- => gh_so_references_2018_08_28 .PostReferenceGH_Questions_R
236+ => gh_so_references_2018_09_23 .PostReferenceGH_Questions_R
0 commit comments