set exec_mem_limit=8G; set enable_two_phase_read_opt = true; set topn_opt_limit_threshold = 1024; SELECT count() from ghdata; SELECT cast(v["repo"]["name"] as string) as repo_name, count() AS stars FROM ghdata WHERE cast(v["type"] as string) = 'WatchEvent' GROUP BY repo_name ORDER BY stars DESC, repo_name LIMIT 5; SELECT max(cast(cast(v["id"] as string) as bigint)) FROM ghdata; SELECT sum(cast(cast(v["id"] as string) as bigint)) FROM ghdata; SELECT sum(cast(v["payload"]["member"]["id"] as bigint)) FROM ghdata; SELECT sum(cast(v["payload"]["pull_request"]["milestone"]["creator"]["site_admin"] as bigint)) FROM ghdata; SELECT sum(length(v["payload"]["pull_request"]["base"]["repo"]["html_url"])) FROM ghdata; SELECT v["payload"]["member"]["id"] as member_id FROM ghdata where cast(v["payload"]["member"]["id"] as string) is not null ORDER BY k LIMIT 10; -- select k, v:payload.commits.author.name AS name, e FROM ghdata as t lateral view explode(cast(v:payload.commits.author.name as array)) tm1 as e order by k limit 5; select k, json_extract(v, '$.repo') from ghdata WHERE cast(v["type"] as string) = 'WatchEvent' order by k limit 10; SELECT cast(v["payload"]["member"]["id"] as bigint) as member_id, count() FROM ghdata where cast(v["payload"]["member"]["id"] as bigint) is not null group by member_id order by 1, 2 desc LIMIT 10; select k, cast(v["id"] as string), cast(v["type"] as string) as type, cast(v["repo"]["name"] as string) from ghdata WHERE cast(v["type"] as string) = 'WatchEvent' order by k limit 10; SELECT cast(v["payload"]["pusher_type"] as text) as pusher_type FROM ghdata where cast(v["payload"]["pusher_type"] as text) is not null ORDER BY k LIMIT 10;