Thanks.
import pandas as pd
df = pd.DataFrame(
dict(
start_time=[
dt.time(0, 0),
dt.time(0, 15),
dt.time(0, 30),
dt.time(0, 45),
dt.time(1, 0),
dt.time(1, 30),
dt.time(10, 0),
dt.time(10, 30),
dt.time(11, 0),
dt.time(11, 30),
dt.time(12, 0),
dt.time(12, 30),
dt.time(12, 5),
dt.time(13, 0),
dt.time(13, 15),
dt.time(13, 30),
dt.time(14, 0),
dt.time(14, 30),
dt.time(14, 5),
dt.time(15, 0),
dt.time(15, 30),
dt.time(16, 0),
dt.time(16, 30),
dt.time(17, 0),
dt.time(17, 30),
dt.time(18, 0),
dt.time(18, 30),
dt.time(19, 0),
dt.time(19, 30),
dt.time(2, 0),
dt.time(2, 30),
dt.time(20, 0),
dt.time(20, 30),
dt.time(21, 0),
dt.time(21, 30),
dt.time(22, 0),
dt.time(22, 30),
dt.time(23, 0),
dt.time(23, 30),
dt.time(3, 0),
dt.time(3, 30),
dt.time(3, 50),
dt.time(4, 0),
dt.time(4, 30),
dt.time(5, 0),
dt.time(5, 30),
dt.time(6, 0),
dt.time(6, 30),
dt.time(7, 0),
dt.time(7, 15),
dt.time(7, 30),
dt.time(7, 45),
dt.time(8, 0),
dt.time(8, 30),
dt.time(9, 0),
dt.time(9, 30),
]
)
)
# Sense check that conversion to parquet and back works...
import pyarrow.parquet as pq
import pyarrow as pa
arrow_table = pa.Table.from_pandas(df)
arrow_table
pq.write_table(arrow_table, "test.parquet")
back_to_pandas = pq.read_table("test.parquet").to_pandas()
back_to_pandas["start_time"].apply(type)
back_to_pandas
from pandas.testing import assert_frame_equal
assert_frame_equal(back_to_pandas, df). # <-- this is fine
from google.cloud import bigquery
bq = bigquery.Client()
job = bq.load_table_from_dataframe(df, "wb_dev_us.time_test")
job.result() #<-- This errors
---------------------------------------------------------------------------
BadRequest Traceback (most recent call last)
~/projects/demand-model2/uk_demand_model/extract/mdd_extract.py in
466 bq = bigquery.Client()
467 job = bq.load_table_from_dataframe(df, "wb_dev_us.time_test_2")
----> 468 job.result()
~/.local/share/virtualenvs/demand-model2-OBCa31Zf/lib/python3.6/site-packages/google/cloud/bigquery/job/base.py in result(self, retry, timeout)
629
630 kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry}
--> 631 return super(_AsyncJob, self).result(timeout=timeout, **kwargs)
632
633 def cancelled(self):
~/.local/share/virtualenvs/demand-model2-OBCa31Zf/lib/python3.6/site-packages/google/api_core/future/polling.py in result(self, timeout, retry)
132 # pylint: disable=raising-bad-type
133 # Pylint doesn't recognize that this is valid in this case.
--> 134 raise self._exception
135
136 return self._result
BadRequest: 400 Error while reading data, error message: Invalid time value 30600000000 for column 'start_time': generic::out_of_range: Cannot return an invalid time value of 30600000000 microseconds relative to the Unix epoch. The range of valid time values is [00:00:00, 23:59:59.999999]
Uploading
datetime.timeobjects viabigquery.Client().load_table_from_dataframefails or generates incorrect times.Testing a single time (
dt.time(12,0)) will upload, but the result in bigquery is incorrect (10:03:46.765952), but the example below fails with an out of expected range error. However, conversion to parquet and back works fine.My guess is that the time stamp generated for parquet is in
usbut BigQuery expectsns?Thanks.
Environment details
google-cloud-bigqueryversion: 2.3.1Code example
Stack trace