Source code for kerchunk.tiff

import io
import fsspec
import enum
import ujson

try:
    import tifffile
except ModuleNotFoundError:  # pragma: no cover
    raise ImportError(
        "tifffile is required for kerchunking TIFF files. Please install with "
        "`pip/conda install tifffile`."
    )

import kerchunk.utils


[docs]def tiff_to_zarr(urlpath, remote_options=None, target=None, target_options=None): """Wraps TIFFFile's fsspec writer to extract metadata as attributes Parameters ---------- urlpath: str Location of input TIFF remote_options: dict pass these to fsspec when opening urlpath target: str Write JSON to this location. If not given, no file is output target_options: dict pass these to fsspec when opening target Returns ------- references dict """ with fsspec.open(urlpath, **(remote_options or {})) as of: url, name = urlpath.rsplit("/", 1) with tifffile.TiffFile(of, name=name) as tif: with tif.series[0].aszarr() as store: of2 = io.StringIO() store.write_fsspec(of2, url=url) out = ujson.loads(of2.getvalue()) meta = ujson.loads(out[".zattrs"]) for k in dir(tif): if not k.endswith("metadata"): continue try: met = getattr(tif, k, None) except Exception: continue try: d = dict(met or {}) except ValueError: # newer tifffile exposes xml structured tags from xml.etree import ElementTree e = ElementTree.fromstring(met) d = {i.get("name"): i.text for i in e} meta.update(d) for k, v in meta.copy().items(): # deref enums if isinstance(v, enum.EnumMeta): meta[k] = v._name_ out[".zattrs"] = ujson.dumps(meta) if "GTRasterTypeGeoKey" in meta: # TODO: make dataset and assign coords for geoTIFF # import zarr # fs = fsspec.filesystem("reference", fo=out # z = zarr.open(out.get_mapper()) # coords = generate_coords(meta, z.shape) # rasterio.crs.CRS.from_epsg(attrs['ProjectedCSTypeGeoKey']).to_wkt("WKT1_GDAL") ?? pass if target is not None: with fsspec.open(target, **(target_options or {})) as of: ujson.dump(out, of) return out
# http://geotiff.maptools.org/spec/geotiff6.html#6.3.1.3 units = { 9001: "metre", 9002: "foot", 9003: "US survey foot", 9015: "mile international nautical", # ... and many more } TiffToZarr = kerchunk.utils.class_factory(tiff_to_zarr)
[docs]def generate_coords(attrs, shape): """Produce coordinate arrays for given variable Specific to GeoTIFF input attributes Parameters ---------- attrs: dict Containing the geoTIFF tags, probably the root group of the dataset shape: tuple[int] The array size in numpy (C) order """ import numpy as np height, width = shape[-2:] xscale, yscale, zscale = attrs["ModelPixelScale"][:3] x0, y0, z0 = attrs["ModelTiepoint"][3:6] out = {} out["x"] = np.arange(width) * xscale + x0 + xscale / 2 out["y"] = np.arange(height) * -yscale + y0 - yscale / 2 if len(shape) > 2: out["z"] = np.arange(shape[-3]) * zscale + z0 + zscale / 2 return out