@article{publications48531, volume = {vol. 49}, number = {n? 2}, month = {December}, author = {Estelle Medous and Camelia Goga and Anne Ruiz-Gazen and Jean-Fran{\cc}ois Beaumont and Alain Dessertaine and Pauline Puech}, address = {Ottawa}, title = {QR prediction for statistical data integration}, publisher = {Statistics Canada}, year = {2023}, journal = {Survey Methodology}, pages = {385--410}, keywords = {Cosmetic estimator, Dual frame, GREG estimator, Non-probability sample, Probability sample, Variance estimator}, url = {https://publications.ut-capitole.fr/id/eprint/48531/}, abstract = {In this paper, we investigate how a big non-probability database can be used to improve estimates of finite population totals from a small probability sample through data integration techniques. In the situation where the study variable is observed in both data sources, Kim and Tam (2021) proposed two design-consistent estimators that can be justified through dual frame survey theory. First, we provide conditions ensuring that these estimators are more efficient than the Horvitz-Thompson estimator when the probability sample is selected using either Poisson sampling or simple random sampling without replacement. Then, we study the class of QR predictors, introduced by S{\"a}rndal and Wright (1984), to handle the less common case where the non-probability database contains no study variable but auxiliary variables. We also require that the non-probability database is large and can be linked to the probability sample. We provide conditions ensuring that the QR predictor is asymptotically design-unbiased. We derive its asymptotic design variance and provide a consistent design-based variance estimator. We compare the design properties of different predictors, in the class of QR predictors, through a simulation study. This class includes a model-based predictor, a model-assisted estimator and a cosmetic estimator. In our simulation setups, the cosmetic estimator performed slightly better than the model-assisted estimator. These findings are confirmed by an application to La Poste data, which also illustrates that the properties of the cosmetic estimator are preserved irrespective of the observed non-probability sample.} }