You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Currently updated the dashboard to use INFORMATION_SCHEMA.PARTITIONS tables, but it doesn't have details on client level.
Good - bytes available, bad - there is no client breakdown.
Potentially we could pre-calculate this data after each crawl.
#standardSQL
WITH metadata AS (
SELECT * FROM `httparchive.pages.__TABLES__` UNION ALL
SELECT * FROM `httparchive.requests.__TABLES__` UNION ALL
SELECT * FROM `httparchive.response_bodies.__TABLES__` UNION ALL
SELECT * FROM `httparchive.summary_pages.__TABLES__` UNION ALL
SELECT * FROM `httparchive.summary_requests.__TABLES__` UNION ALL
SELECT * FROM `httparchive.lighthouse.__TABLES__` UNION ALL
SELECT * FROM `httparchive.technologies.__TABLES__` UNION ALL
SELECT
'httparchive' AS project_id,
'blink_features' AS dataset_id,
CONCAT(REGEXP_REPLACE(yyyymmdd, r'(\d{4})(\d{2})(\d{2})', r'\1_\2_\3_'), client) AS table_id,
NULL AS creation_time,
NULL AS last_modified_time,
COUNT(0) AS row_count,
SUM(LENGTH(CONCAT(yyyymmdd, client, id, feature, type, CAST(num_urls AS STRING), CAST(total_urls AS STRING), CAST(pct_urls AS STRING), ARRAY_TO_STRING(sample_urls, ' ')))) AS size_bytes,
1 AS type
FROM
`httparchive.blink_features.usage`
GROUP BY
table_id,
client
UNION ALL
SELECT
'httparchive' AS project_id,
'all.pages' AS dataset_id,
CONCAT(FORMAT_DATE('%Y_%m_%d_', date), client) AS table_id,
NULL AS creation_time,
NULL AS last_modified_time,
COUNT(0) AS row_count,
NULL AS size_bytes,
1 AS type
FROM
`httparchive.all.pages`
WHERE
date < CURRENT_DATE()
GROUP BY
table_id
UNION ALL
SELECT
'httparchive' AS project_id,
'all.requests' AS dataset_id,
CONCAT(FORMAT_DATE('%Y_%m_%d_', date), client) AS table_id,
NULL AS creation_time,
NULL AS last_modified_time,
COUNT(0) AS row_count,
NULL AS size_bytes,
1 AS type
FROM
`httparchive.all.requests`
WHERE
date < CURRENT_DATE()
GROUP BY
table_id)
SELECT
REPLACE(SUBSTR(table_id, 0, 10), '_', '') AS yyyymmdd,
IF(ENDS_WITH(table_id, 'desktop'), 'desktop', 'mobile') AS client,
*
FROM
metadata
The text was updated successfully, but these errors were encountered:
HTTP Archive BigQuery Meta Dashboard dashboard doesn't show the information for new tables because of the query timeout
Currently updated the dashboard to use
INFORMATION_SCHEMA.PARTITIONS
tables, but it doesn't have details on client level.Good - bytes available, bad - there is no client breakdown.
Potentially we could pre-calculate this data after each crawl.
The text was updated successfully, but these errors were encountered: