Skip to content Skip to sidebar Skip to footer

How To Deserialise Big Json File (~300mb)

I want to parse a JSON file (size ~300Mb). I use Jackson library and ObjectMapper. Is it normal if i get memory problems? The first time, i use BufferedReader, it crash application

Solution 1:

Jackson

You can mix Streaming API with regular ObjectMapper. Using these we can implement nice Iterator class. Using URL we can build stream and pass to our implementation. Example code could look like below:

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.math.BigDecimal;
import java.net.URL;
import java.util.Iterator;

publicclassJsonPathApp {

    publicstaticvoidmain(String[] args)throws Exception {
        //Just to make it work. Probably you should not do that!
        SSLUtilities.trustAllHostnames();
        SSLUtilities.trustAllHttpsCertificates();

        URLurl=newURL("https://data.opendatasoft.com/explore/dataset/vehicules-commercialises@public/download/?format=json&timezone=Europe/Berlin");
        try (BufferedReaderreader=newBufferedReader(newInputStreamReader(url.openConnection().getInputStream()))) {
            FieldsJsonIteratorfieldsJsonIterator=newFieldsJsonIterator(reader);
            while (fieldsJsonIterator.hasNext()) {
                Fieldsfields= fieldsJsonIterator.next();
                System.out.println(fields);
                // Save object to DB
            }
        }
    }
}

classFieldsJsonIteratorimplementsIterator<Fields> {

    privatefinal ObjectMapper mapper;
    privatefinal JsonParser parser;

    publicFieldsJsonIterator(Reader reader)throws IOException {
        mapper = newObjectMapper();
        mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);

        parser = mapper.getFactory().createParser(reader);
        skipStart();
    }

    privatevoidskipStart()throws IOException {
        while (parser.currentToken() != JsonToken.START_OBJECT) {
            parser.nextToken();
        }
    }

    @OverridepublicbooleanhasNext() {
        try {
            while (parser.currentToken() == null) {
                parser.nextToken();
            }
        } catch (IOException e) {
            thrownewIllegalStateException(e);
        }

        return parser.currentToken() == JsonToken.START_OBJECT;
    }

    @Overridepublic Fields next() {
        try {
            return mapper.readValue(parser, FieldsWrapper.class).fields;
        } catch (IOException e) {
            thrownewIllegalStateException(e);
        }
    }

    privatestaticfinalclassFieldsWrapper {
        public Fields fields;
    }
}

classFields {

    private String cnit;

    @JsonProperty("puissance_maximale")private BigDecimal maximumPower;

    @JsonProperty("champ_v9")private String fieldV9;

    @JsonProperty("boite_de_vitesse")private String gearbox;

    // add other required properties// getters, setters, toString
}

Above code prints:

Fields{cnit='MMB76K3BQJ41', maximumPower=110.0, fieldV9='70/220*2006/96EURO4', gearbox='A 5'}
Fields{cnit='M10MCDVPF15Z219', maximumPower=95.0, fieldV9='"715/2007*566/2011EURO5', gearbox='A 7'}
Fields{cnit='M10MCDVP027V654', maximumPower=150.0, fieldV9='715/2007*692/2008EURO5', gearbox='A 7'}
Fields{cnit='M10MCDVPG137264', maximumPower=120.0, fieldV9='715/2007*692/2008EURO5', gearbox='M 6'}
Fields{cnit='MVV4912QN718', maximumPower=210.0, fieldV9='null', gearbox='A 6'}
Fields{cnit='MMB76K3B2K88', maximumPower=110.0, fieldV9='null', gearbox='A 5'}
Fields{cnit='M10MCDVP012N140', maximumPower=80.0, fieldV9='70/220*2006/96EURO4', gearbox='M 6'}
Fields{cnit='MJN5423PU123', maximumPower=88.0, fieldV9='null', gearbox='M 6'}
Fields{cnit='M10MCDVP376T303', maximumPower=120.0, fieldV9='"715/2007*692/2008EURO5', gearbox='M 6'}
Fields{cnit='MMB53H3B5Z93', maximumPower=80.0, fieldV9='70/220*2006/96EURO4', gearbox='M 6'}
Fields{cnit='MPE1403E4834', maximumPower=81.0, fieldV9='null', gearbox='M 5'}
Fields{cnit='M10MCDVP018J905', maximumPower=110.0, fieldV9='70/220*2006/96EURO4', gearbox='M 6'}
Fields{cnit='M10MCDVPG112904', maximumPower=100.0, fieldV9='"715/2007*692/2008EURO5', gearbox='M 6'}
Fields{cnit='M10MCDVP015R723', maximumPower=110.0, fieldV9='70/220*2006/96EURO4', gearbox='A 5'}
...

Gson

We can do the same using Gson. Example implementation could look like below:

classFieldsJsonIteratorimplementsIterator<Fields> {

    privatefinal Gson mapper;
    privatefinal JsonReader parser;

    publicFieldsJsonIterator(Reader reader)throws IOException {
        mapper = newGsonBuilder().create();

        parser = mapper.newJsonReader(reader);
        skipStart();
    }

    privatevoidskipStart()throws IOException {
        parser.beginArray();
    }

    @OverridepublicbooleanhasNext() {
        try {
            return parser.hasNext();
        } catch (IOException e) {
            thrownewIllegalStateException(e);
        }
    }

    @Overridepublic Fields next() {
        return ((FieldsWrapper) mapper.fromJson(parser, FieldsWrapper.class)).fields;
    }

    privatestaticfinalclassFieldsWrapper {
        public Fields fields;
    }
}

classFields {

    private String cnit;

    @SerializedName("puissance_maximale")private BigDecimal maximumPower;

    @SerializedName("champ_v9")private String fieldV9;

    @SerializedName("boite_de_vitesse")private String gearbox;

    // getters, setters, toString
}

Usage and output should be the same like it is for Jackson.

See also:

Post a Comment for "How To Deserialise Big Json File (~300mb)"